diff --git a/Desktop/open-source/R/.github/CODEOWNERS b/Desktop/open-source/R/.github/CODEOWNERS
new file mode 100644
index 00000000..076e386d
--- /dev/null
+++ b/Desktop/open-source/R/.github/CODEOWNERS
@@ -0,0 +1,4 @@
+* @siriak @acylam @eshom
+*.md @Panquesito7
+/.github @Panquesito7
+LICENSE @Panquesito7
diff --git a/Desktop/open-source/R/.github/copilot-instructions.md b/Desktop/open-source/R/.github/copilot-instructions.md
new file mode 100644
index 00000000..d39c1cd9
--- /dev/null
+++ b/Desktop/open-source/R/.github/copilot-instructions.md
@@ -0,0 +1,59 @@
+# Copilot Instructions for TheAlgorithms/R
+
+## General Guidelines
+
+This repository contains implementations of various algorithms in R. All contributions should follow these guidelines to maintain code quality and consistency.
+
+## Code Quality & Functionality
+
+- Ensure that your code is functional and well-structured before submitting
+- The code should run without errors in an R environment and produce the expected output
+- Follow best practices for efficiency, readability, and maintainability
+- Use consistent and meaningful variable names (use `.` or `_` to separate words, e.g., `results.df` for a data frame)
+
+## Adding New Algorithms
+
+When adding a new algorithm:
+- **Verify that the algorithm is not already implemented** in the repository (including under a different name)
+- **Confirm that the proposed algorithm is a recognized computer-science algorithm**, not a problem-specific adaptation of a general technique (e.g., tuned for LeetCode or other competitive-programming problems)
+- Include a brief explanation of the algorithm in the file as comments
+- Add an example showcasing its usage (can be commented within the script)
+- **Update DIRECTORY.md** to include the new algorithm in the appropriate section
+
+## Modifying Existing Algorithms
+
+When modifying existing algorithms:
+- Clearly document the changes in your pull request description
+- Ensure that your modifications do not break existing functionality
+- If applicable, update or add test cases to validate your changes
+
+## File Naming & Structure Conventions
+
+- **All code file names must use lowercase `.r` extension** (not `.R`)
+- Ensure that filenames follow the existing directory structure and naming patterns
+- Files should be placed in the appropriate category directory (e.g., `sorting_algorithms/`, `graph_algorithms/`, `mathematics/`)
+
+## Documentation & Comments
+
+- Provide clear and concise documentation in the form of comments within the code
+- Add a brief docstring at the beginning of the script explaining:
+ - What the algorithm does
+ - The expected input and output
+ - Any dependencies required
+
+## Testing & Verification
+
+Before submitting a pull request, verify that your code:
+- Runs correctly with different test cases
+- Does not produce unnecessary warnings or errors
+- If applicable, add a test file demonstrating the algorithm's correctness
+
+## Pull Request Review Checklist
+
+When reviewing a pull request:
+- Verify that any added algorithms or data structures aren't already implemented elsewhere in the repository (including under a different name)
+- Confirm that the proposed algorithm is a recognized computer-science algorithm, not a problem-specific adaptation of a general technique (e.g., tuned for LeetCode or other competitive-programming problems)
+- Check that the extension of all code file names is a lowercase `.r`
+- Check that DIRECTORY.md was updated correctly
+- Verify that the code includes appropriate documentation and examples
+- Ensure that variable naming follows repository conventions
diff --git a/Desktop/open-source/R/.github/scripts/doc_builder.r b/Desktop/open-source/R/.github/scripts/doc_builder.r
new file mode 100644
index 00000000..0b0d2b3e
--- /dev/null
+++ b/Desktop/open-source/R/.github/scripts/doc_builder.r
@@ -0,0 +1,32 @@
+# Taken from https://stackoverflow.com/a/4749909 and slightly edited. Thanks!
+list_dirs <- function(path=".", pattern=NULL, all.dirs=FALSE,
+ full.names=FALSE, ignore.case=FALSE) {
+
+ all <- list.files(path, pattern, all.dirs,
+ full.names=TRUE, recursive=FALSE, ignore.case)
+ dirs <- all[file.info(all)$isdir]
+
+ if(isTRUE(full.names))
+ return(dirs)
+ else
+ return(basename(dirs))
+}
+
+cat("R process started.\n")
+cat("Change working directory to documentation directory\n")
+setwd("documentation")
+
+cat("Creating the directory list\n")
+dirlist <- list_dirs(path="..", pattern=".R", ignore.case = TRUE, full.names = TRUE)
+
+cat("Getting a list of R scripts from the algorithm directories.\n")
+scriptlist <- lapply(dirlist, list.files, ".R", ignore.case = TRUE, full.names = TRUE)
+cat("Removing from the list empty directories.\n")
+scriptlist <- scriptlist[!sapply(scriptlist, identical, character(0))]
+print(unlist(scriptlist))
+
+cat("Compiling documentation from scripts.\n")
+invisible(lapply(unlist(scriptlist), function(x) tryCatch(knitr::spin(x),
+ error = function(e) message("Error compiling: ", e))))
+
+cat("R process done.\n")
diff --git a/Desktop/open-source/R/.github/workflows/directory_workflow.yml b/Desktop/open-source/R/.github/workflows/directory_workflow.yml
new file mode 100644
index 00000000..b9a157d7
--- /dev/null
+++ b/Desktop/open-source/R/.github/workflows/directory_workflow.yml
@@ -0,0 +1,40 @@
+name: Directory/Filename Formatter workflow
+on: [push, pull_request]
+
+jobs:
+ main:
+ name: (Directory) Formatter
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Setup Git configuration
+ run: |
+ git config --global user.name 'autoprettier'
+ git config --global user.email 'actions@github.com'
+ git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
+ - name: Filename Formatter
+ run: |
+ IFS=$'\n'
+ for fname in `find . -type f -name '*.R' -o -name '*.R'`
+ do
+ echo "${fname}"
+ new_fname=`echo ${fname} | tr ' ' '_'`
+ echo " ${new_fname}"
+ new_fname=`echo ${new_fname} | tr 'A-Z' 'a-z'`
+ echo " ${new_fname}"
+ new_fname=`echo ${new_fname} | tr '-' '_'`
+ echo " ${new_fname}"
+ if [ ${fname} != ${new_fname} ]
+ then
+ echo " ${fname} --> ${new_fname}"
+ git "mv" "${fname}" ${new_fname}
+ fi
+ done
+ git commit -am "Formatting filenames ${GITHUB_SHA::8}" || true
+ - name: Update DIRECTORY.md
+ run: |
+ wget https://raw.githubusercontent.com/TheAlgorithms/scripts/main/build_directory_md.py
+ python3 build_directory_md.py R . .R,.r > DIRECTORY.md
+ git diff
+ git commit -m "Update DIRECTORY.md" DIRECTORY.md || true
+ git push --force origin HEAD:$GITHUB_REF || true
diff --git a/Desktop/open-source/R/.github/workflows/documentation_workflow.yml b/Desktop/open-source/R/.github/workflows/documentation_workflow.yml
new file mode 100644
index 00000000..3762b445
--- /dev/null
+++ b/Desktop/open-source/R/.github/workflows/documentation_workflow.yml
@@ -0,0 +1,36 @@
+name: Documentation
+on: [push, pull_request]
+
+jobs:
+ MakeDocs:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Setup R
+ uses: r-lib/actions/setup-r@v2-branch
+ with:
+ r-version: '4.1.0'
+ - name: Create Documentation directory
+ run: |
+ echo "Creating 'Documentation'"
+ mkdir -p documentation
+ - name: Install dependencies
+ run: |
+ echo "Installing R package dependencies. Scripts might have additional dependencies installed."
+ Rscript -e 'if (!require(knitr)) install.packages("knitr")'
+ Rscript -e 'if (!require(markdown)) install.packages("markdown")'
+ - name: Remove old documentation
+ run: 'rm -rf documentation/*'
+ - name: Generate new documentation
+ run: 'Rscript .github/scripts/doc_builder.r'
+ - name: Commit Documentation
+ run: |
+ git diff-index --quiet HEAD && exit
+ echo "Setting up Git to push changes."
+ git config --global user.name 'autoprettier'
+ git config --global user.email 'actions@github.com'
+ git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
+ echo "Staging documentation"
+ git add documentation/ # This is the only directory that has changes and should be staged
+ git commit -m "Update documentation" || true
+ git push || true
diff --git a/Desktop/open-source/R/.github/workflows/stale.yml b/Desktop/open-source/R/.github/workflows/stale.yml
new file mode 100644
index 00000000..95bd0935
--- /dev/null
+++ b/Desktop/open-source/R/.github/workflows/stale.yml
@@ -0,0 +1,18 @@
+name: 'Close stale issues and PRs'
+on:
+ schedule:
+ - cron: '0 0 * * *'
+jobs:
+ stale:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@v4
+ with:
+ stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
+ close-issue-message: 'This issue was closed because it has been stalled for 7 days with no activity.'
+ stale-pr-message: 'This PR is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 7 days.'
+ close-pr-message: 'This PR was closed because it has been stalled for 7 days with no activity.'
+ exempt-issue-labels: 'dont-close'
+ exempt-pr-labels: 'dont-close'
+ days-before-stale: 30
+ days-before-close: 7
diff --git a/Desktop/open-source/R/.gitpod.Dockerfile b/Desktop/open-source/R/.gitpod.Dockerfile
new file mode 100644
index 00000000..a4e549e2
--- /dev/null
+++ b/Desktop/open-source/R/.gitpod.Dockerfile
@@ -0,0 +1,3 @@
+FROM gitpod/workspace-full
+
+RUN brew install R
\ No newline at end of file
diff --git a/Desktop/open-source/R/.gitpod.yml b/Desktop/open-source/R/.gitpod.yml
new file mode 100644
index 00000000..4e222489
--- /dev/null
+++ b/Desktop/open-source/R/.gitpod.yml
@@ -0,0 +1,2 @@
+image:
+ file: .gitpod.Dockerfile
\ No newline at end of file
diff --git a/Desktop/open-source/R/DIRECTORY.md b/Desktop/open-source/R/DIRECTORY.md
new file mode 100644
index 00000000..0a0560e5
--- /dev/null
+++ b/Desktop/open-source/R/DIRECTORY.md
@@ -0,0 +1,151 @@
+
+## Association Algorithms
+ * [Apriori](https://github.com/TheAlgorithms/R/blob/HEAD/association_algorithms/apriori.r)
+
+## Classification Algorithms
+ * [Decision Tree](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/decision_tree.r)
+ * [Gradient Boosting Algorithms](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/gradient_boosting_algorithms.r)
+ * [Knn](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/knn.r)
+ * [Light Gbm](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/light_gbm.r)
+ * [Logistic Regression](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/logistic_regression.r)
+ * [Logistic Regression 2](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/logistic_regression_2.r)
+ * [Naive Bayes](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/naive_bayes.r)
+ * [Random Forest](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/random_forest.r)
+ * [Svm](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/svm.r)
+ * [Xgboost](https://github.com/TheAlgorithms/R/blob/HEAD/classification_algorithms/xgboost.r)
+
+## Clustering Algorithms
+ * [Dbscan Clustering](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/dbscan_clustering.r)
+ * [Gmm](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/gmm.r)
+ * [Heirarchical Clustering](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/heirarchical_clustering.r)
+ * [K Means](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/k_means.r)
+ * [Kmeans Clustering](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/kmeans_clustering.r)
+ * [Kmeans Raw R](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/kmeans_raw_r.r)
+ * [Pam](https://github.com/TheAlgorithms/R/blob/HEAD/clustering_algorithms/pam.r)
+
+## Data Manipulation
+ * [Label Encode](https://github.com/TheAlgorithms/R/blob/HEAD/data_manipulation/label_encode.r)
+ * [One Hot Encode](https://github.com/TheAlgorithms/R/blob/HEAD/data_manipulation/one_hot_encode.r)
+ * [Shorten.Vector](https://github.com/TheAlgorithms/R/blob/HEAD/data_manipulation/shorten.vector.r)
+
+## Data Preprocessing
+ * [Data Normalization Standardization](https://github.com/TheAlgorithms/R/blob/HEAD/data_preprocessing/data_normalization_standardization.r)
+ * [Data Processing](https://github.com/TheAlgorithms/R/blob/HEAD/data_preprocessing/data_processing.r)
+ * [Dimensionality Reduction Algorithms](https://github.com/TheAlgorithms/R/blob/HEAD/data_preprocessing/dimensionality_reduction_algorithms.r)
+ * [K Folds](https://github.com/TheAlgorithms/R/blob/HEAD/data_preprocessing/k_folds.r)
+ * [Lasso](https://github.com/TheAlgorithms/R/blob/HEAD/data_preprocessing/lasso.r)
+
+## Data Structures
+ * [Binary Search Tree](https://github.com/TheAlgorithms/R/blob/HEAD/data_structures/binary_search_tree.r)
+
+## Dynamic Programming
+ * 0
+ * 0
+ * [1 Knapsack Problem](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/0/0/1_knapsack_problem.r)
+ * [1 Knapsack Problem](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/0/1_knapsack_problem.r)
+ * [Coin Change](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/coin_change.r)
+ * [Longest Common Subsequence](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/longest_common_subsequence.r)
+ * [Longest Increasing Subsequence](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/longest_increasing_subsequence.r)
+ * [Matrix Chain Multiplication](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/matrix_chain_multiplication.r)
+ * [Minimum Path Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/minimum_path_sum.r)
+ * [Subset Sum](https://github.com/TheAlgorithms/R/blob/HEAD/dynamic_programming/subset_sum.r)
+
+## Graph Algorithms
+ * [Bellman Ford Shortest Path](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/bellman_ford_shortest_path.r)
+ * [Breadth First Search](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/breadth_first_search.r)
+ * [Bridge Detector](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/bridge_detector.r)
+ * [Depth First Search](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/depth_first_search.r)
+ * [Dijkstra Shortest Path](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/dijkstra_shortest_path.r)
+ * [Floyd Warshall](https://github.com/TheAlgorithms/R/blob/HEAD/graph_algorithms/floyd_warshall.r)
+
+## Machine Learning
+ * [Gradient Boosting](https://github.com/TheAlgorithms/R/blob/HEAD/machine_learning/gradient_boosting.r)
+
+## Mathematics
+ * [Amicable Numbers](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/amicable_numbers.r)
+ * [Armstrong Number](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/armstrong_number.r)
+ * [Bisection Method](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/bisection_method.r)
+ * [Catalan Numbers](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/catalan_numbers.r)
+ * [Euclidean Distance](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/euclidean_distance.r)
+ * [Extended Euclidean Algorithm](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/extended_euclidean_algorithm.r)
+ * [Factorial](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/factorial.r)
+ * [Fibonacci](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/fibonacci.r)
+ * [First N Fibonacci](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/first_n_fibonacci.r)
+ * [Greatest Common Divisor](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/greatest_common_divisor.r)
+ * [Josephus Problem](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/josephus_problem.r)
+ * [Least Common Multiple](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/least_common_multiple.r)
+ * [Modular Exponentiation](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/modular_exponentiation.r)
+ * [Newton Raphson Method](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/newton_raphson_method.r)
+ * [Perfect Number](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/perfect_number.r)
+ * [Perfect Square](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/perfect_square.r)
+ * [Permutation Calculation](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/permutation_calculation.r)
+ * [Pi Monte Carlo](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/pi_monte_carlo.r)
+ * [Prime](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/prime.r)
+ * [Sieve Of Eratosthenes](https://github.com/TheAlgorithms/R/blob/HEAD/mathematics/sieve_of_eratosthenes.r)
+
+## Quantitative Finance
+ * [Black Scholes Option Pricing](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/black_scholes_option_pricing.r)
+ * [Kalman Filter](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/kalman_filter.r)
+ * [Markowitz Portfolio Optimization](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/markowitz_portfolio_optimization.r)
+ * [Monte Carlo Simulation](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/monte_carlo_simulation.r)
+ * [Risk Metrics](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/risk_metrics.r)
+ * [Time Series Analyzer](https://github.com/TheAlgorithms/R/blob/HEAD/quantitative_finance/time_series_analyzer.r)
+
+## Regression Algorithms
+ * [Ann](https://github.com/TheAlgorithms/R/blob/HEAD/regression_algorithms/ann.r)
+ * [Anova Oneway](https://github.com/TheAlgorithms/R/blob/HEAD/regression_algorithms/anova_oneway.r)
+ * [Linear Regression](https://github.com/TheAlgorithms/R/blob/HEAD/regression_algorithms/linear_regression.r)
+ * [Linearregressionrawr](https://github.com/TheAlgorithms/R/blob/HEAD/regression_algorithms/linearregressionrawr.r)
+ * [Multiple Linear Regression](https://github.com/TheAlgorithms/R/blob/HEAD/regression_algorithms/multiple_linear_regression.r)
+
+## Searches
+ * [Binary Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/binary_search.r)
+ * [Jump Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/jump_search.r)
+ * [Linear Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/linear_search.r)
+ * [Rabin.Karp.String.Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/rabin.karp.string.search.r)
+ * [Ternary.Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/ternary.search.r)
+ * [Z.Algorithm.Search](https://github.com/TheAlgorithms/R/blob/HEAD/searches/z.algorithm.search.r)
+
+## Sorting Algorithms
+ * [Binary Insertion Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/binary_insertion_sort.r)
+ * [Bubble Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/bubble_sort.r)
+ * [Bucket Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/bucket_sort.r)
+ * [Cocktail Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/cocktail_sort.r)
+ * [Comb Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/comb_sort.r)
+ * [Counting Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/counting_sort.r)
+ * [Cycle Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/cycle_sort.r)
+ * [Gnome Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/gnome_sort.r)
+ * [Heap Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/heap_sort.r)
+ * [Insertion Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/insertion_sort.r)
+ * [Merge Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/merge_sort.r)
+ * [Odd Even Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/odd_even_sort.r)
+ * [Pancake Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/pancake_sort.r)
+ * [Patience Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/patience_sort.r)
+ * [Quick Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/quick_sort.r)
+ * [Radix Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/radix_sort.r)
+ * [Selection Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/selection_sort.r)
+ * [Shell Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/shell_sort.r)
+ * [Stooge Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/stooge_sort.r)
+ * [Strand Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/strand_sort.r)
+ * [Tim Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/tim_sort.r)
+ * [Topological Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/topological_sort.r)
+ * [Wiggle Sort](https://github.com/TheAlgorithms/R/blob/HEAD/sorting_algorithms/wiggle_sort.r)
+
+## String Manipulation
+ * [Burrows](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/burrows.r)
+ * [Findpalindrome](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/findpalindrome.r)
+ * [Hamming Distance](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/hamming_distance.r)
+ * [Is.Anagram](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/is.anagram.r)
+ * [Is.Lower](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/is.lower.r)
+ * [Is.Uppercase](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/is.uppercase.r)
+ * [Kmp String Matching](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/kmp_string_matching.r)
+ * [Levenshtein](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/levenshtein.r)
+ * [Longest.Palindromic.Subsequence](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/longest.palindromic.subsequence.r)
+ * [Longest.Substring.No.Repeat](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/longest.substring.no.repeat.r)
+ * [Manacher.Longest.Palindrome](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/manacher.longest.palindrome.r)
+ * [Maskwords](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/maskwords.r)
+ * [Min.Palindromic.Insert](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/min.palindromic.insert.r)
+ * [Minimum.Window.Substring](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/minimum.window.substring.r)
+ * [Rearrangeways](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/rearrangeways.r)
+ * [Shortest.Common.Supersequence](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/shortest.common.supersequence.r)
+ * [Unique.Letters.Count](https://github.com/TheAlgorithms/R/blob/HEAD/string_manipulation/unique.letters.count.r)
diff --git a/Desktop/open-source/R/LICENSE b/Desktop/open-source/R/LICENSE
new file mode 100644
index 00000000..6d7f310d
--- /dev/null
+++ b/Desktop/open-source/R/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 The Algorithms
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Desktop/open-source/R/README.md b/Desktop/open-source/R/README.md
new file mode 100644
index 00000000..efe4947b
--- /dev/null
+++ b/Desktop/open-source/R/README.md
@@ -0,0 +1,70 @@
+# The Algorithms - R
+
+
+
+
+
+
+
+
+
+*R is a programming language and free software environment for statistical computing and graphics supported by the R Foundation for Statistical Computing. The R language is widely used among statisticians and data miners for developing statistical software and data analysis. Polls, data mining surveys and studies of scholarly literature databases show substantial increases in popularity in recent years. As of November 2019, R ranks 16th in the TIOBE index, a measure of popularity of programming languages. ([Wikipedia](https://en.wikipedia.org/wiki/R_(programming_language)))*
+
+All algorithms can be found in the [`DIRECTORY.md`](https://github.com/TheAlgorithms/R/blob/master/DIRECTORY.md) file ordered by folder.
+
+## Contribution Guidelines
+
+Please ensure to follow the points stated below if you would like to contribute:
+### 1. Code Quality & Functionality
+
+-Ensure that your code is functional and well-structured before submitting a pull request.
+
+-The code should run without errors in an R environment and produce the expected output.
+
+-Follow best practices for efficiency, readability, and maintainability.
+
+### 2. Adding a New Algorithm
+
+-Verify that the algorithm is not already implemented in the repository.
+
+-Include a brief explanation of the algorithm in the file as comments.
+
+-Add an example showcasing its usage (it can be commented within the script).
+
+### 3. Modifying an Existing Algorithm
+
+-If making improvements, clearly document the changes in your pull request description.
+
+-Ensure that your modifications do not break existing functionality.
+
+-If applicable, update or add test cases to validate your changes.
+
+### 4. Code Style & Naming Conventions
+
+-Use consistent and meaningful variable names. Follow one of these naming conventions:
+
+-Use . or _ to separate words (e.g., results.df for a data frame).
+
+-Ensure that filenames follow the existing directory structure and naming patterns.
+
+### 5. Documentation & Comments
+
+-Provide clear and concise documentation in the form of comments within the code.
+
+-Add a brief docstring at the beginning of the script explaining:
+
+-What the algorithm does
+
+-The expected input and output
+
+-Any dependencies required
+
+### 6. Testing & Verification
+
+-Before submitting a pull request, verify that your code:
+
+-Runs correctly with different test cases
+
+-Does not produce unnecessary warnings or errors
+
+-If applicable, add a test file demonstrating the algorithm’s correctness.
diff --git a/Desktop/open-source/R/association_algorithms/apriori.r b/Desktop/open-source/R/association_algorithms/apriori.r
new file mode 100644
index 00000000..b770864e
--- /dev/null
+++ b/Desktop/open-source/R/association_algorithms/apriori.r
@@ -0,0 +1,10 @@
+library(arules)
+groceries <- read.transactions("groceries.csv", sep=",")
+summary(groceries)
+itemFrequencyPlot(groceries, topN=20)
+
+#sample for randomly extracting samples, image function for visualing sparse matrix
+image(sample(groceries,100))
+groceries_rule <- apriori(data=groceries, parameter=list(support=0.006, confidence=0.25, minlen=2))
+plotly_arules(groceries_rule)
+summary(groceries_rule)
diff --git a/Desktop/open-source/R/classification_algorithms/decision_tree.r b/Desktop/open-source/R/classification_algorithms/decision_tree.r
new file mode 100644
index 00000000..c7042ad0
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/decision_tree.r
@@ -0,0 +1,7 @@
+library(rpart)
+x <- cbind(x_train,y_train)
+# grow tree
+fit <- rpart(y_train ~ ., data = x,method="class")
+summary(fit)
+# Predict Output
+predicted= predict(fit,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/gradient_boosting_algorithms.r b/Desktop/open-source/R/classification_algorithms/gradient_boosting_algorithms.r
new file mode 100644
index 00000000..c0966175
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/gradient_boosting_algorithms.r
@@ -0,0 +1,71 @@
+# GBM
+library(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+fitControl <- trainControl( method = "repeatedcv", number = 4, repeats = 4)
+fit <- train(y ~ ., data = x, method = "gbm", trControl = fitControl,verbose = FALSE)
+predicted= predict(fit,x_test,type= "prob")[,2]
+
+
+
+# XGBoost
+require(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+TrainControl <- trainControl( method = "repeatedcv", number = 10, repeats = 4)
+model<- train(y ~ ., data = x, method = "xgbLinear", trControl = TrainControl,verbose = FALSE)
+# OR
+model<- train(y ~ ., data = x, method = "xgbTree", trControl = TrainControl,verbose = FALSE)
+predicted <- predict(model, x_test)
+
+
+
+# LightGBM
+library(RLightGBM)
+data(example.binary)
+# Parameters
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+# Create data handle and booster
+handle.data <- lgbm.data.create(x)
+lgbm.data.setField(handle.data, "label", y)
+handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+# Train for num_iterations iterations and eval every 5 steps
+lgbm.booster.train(handle.booster, num_iterations, 5)
+# Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+# Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+# Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
+
+
+
+# Catboost
+set.seed(1)
+
+require(titanic)
+
+require(caret)
+
+require(catboost)
+
+tt <- titanic::titanic_train[complete.cases(titanic::titanic_train),]
+
+data <- as.data.frame(as.matrix(tt), stringsAsFactors = TRUE)
+
+drop_columns = c("PassengerId", "Survived", "Name", "Ticket", "Cabin")
+
+x <- data[,!(names(data) %in% drop_columns)]y <- data[,c("Survived")]
+
+fit_control <- trainControl(method = "cv", number = 4,classProbs = TRUE)
+
+grid <- expand.grid(depth = c(4, 6, 8),learning_rate = 0.1,iterations = 100, l2_leaf_reg = 1e-3, rsm = 0.95, border_count = 64)
+
+report <- train(x, as.factor(make.names(y)),method = catboost.caret,verbose = TRUE, preProc = NULL,tuneGrid = grid, trControl = fit_control)
+
+print(report)
+
+importance <- varImp(report, scale = FALSE)
+
+print(importance)
diff --git a/Desktop/open-source/R/classification_algorithms/knn.r b/Desktop/open-source/R/classification_algorithms/knn.r
new file mode 100644
index 00000000..afd79f6c
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/knn.r
@@ -0,0 +1,7 @@
+library(knn)
+x <- cbind(x_train,y_train)
+# Fitting model
+fit <-knn(y_train ~ ., data = x,k=5)
+summary(fit)
+# Predict Output
+predicted= predict(fit,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/light_gbm.r b/Desktop/open-source/R/classification_algorithms/light_gbm.r
new file mode 100644
index 00000000..993ba931
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/light_gbm.r
@@ -0,0 +1,26 @@
+library(RLightGBM)
+data(example.binary)
+#Parameters
+
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+
+#Create data handle and booster
+handle.data <- lgbm.data.create(x)
+
+lgbm.data.setField(handle.data, "label", y)
+
+handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+
+#Train for num_iterations iterations and eval every 5 steps
+
+lgbm.booster.train(handle.booster, num_iterations, 5)
+
+#Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+
+#Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+
+#Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
diff --git a/Desktop/open-source/R/classification_algorithms/logistic_regression.r b/Desktop/open-source/R/classification_algorithms/logistic_regression.r
new file mode 100644
index 00000000..1e946846
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/logistic_regression.r
@@ -0,0 +1,6 @@
+x <- cbind(x_train,y_train)
+# Train the model using the training sets and check score
+logistic <- glm(y_train ~ ., data = x,family='binomial')
+summary(logistic)
+# Predict Output
+predicted= predict(logistic,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/logistic_regression_2.r b/Desktop/open-source/R/classification_algorithms/logistic_regression_2.r
new file mode 100644
index 00000000..5060e7d4
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/logistic_regression_2.r
@@ -0,0 +1,10 @@
+# Introduction to logistic regression
+
+# glm stands for Generalized Linear Model
+mod1 <- glm(y_data~x_data, data=name_of_the_dataframe, family="binomial")
+
+# displays the output of the model computed by the previous line
+summary(mod1)
+
+# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod1, x_test_data)
diff --git a/Desktop/open-source/R/classification_algorithms/naive_bayes.r b/Desktop/open-source/R/classification_algorithms/naive_bayes.r
new file mode 100644
index 00000000..c160722b
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/naive_bayes.r
@@ -0,0 +1,7 @@
+library(e1071)
+x <- cbind(x_train,y_train)
+# Fitting model
+fit <-naiveBayes(y_train ~ ., data = x)
+summary(fit)
+# Predict Output
+predicted= predict(fit,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/random_forest.r b/Desktop/open-source/R/classification_algorithms/random_forest.r
new file mode 100644
index 00000000..e45d43ae
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/random_forest.r
@@ -0,0 +1,7 @@
+library(randomForest)
+x <- cbind(x_train,y_train)
+# Fitting model
+fit <- randomForest(Species ~ ., x,ntree=500)
+summary(fit)
+# Predict Output
+predicted= predict(fit,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/svm.r b/Desktop/open-source/R/classification_algorithms/svm.r
new file mode 100644
index 00000000..9cc4b5b2
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/svm.r
@@ -0,0 +1,7 @@
+library(e1071)
+x <- cbind(x_train,y_train)
+# Fitting model
+fit <-svm(y_train ~ ., data = x)
+summary(fit)
+# Predict Output
+predicted= predict(fit,x_test)
diff --git a/Desktop/open-source/R/classification_algorithms/xgboost.r b/Desktop/open-source/R/classification_algorithms/xgboost.r
new file mode 100644
index 00000000..5e54ba57
--- /dev/null
+++ b/Desktop/open-source/R/classification_algorithms/xgboost.r
@@ -0,0 +1,19 @@
+library(tidyverse)
+library(xgboost)
+
+ind<-sample(2,nrow(diamonds),replace = T,prob = c(0.7,0.3))
+train.set<-diamonds[ind==1,]
+test.set<-diamonds[ind==2,]
+
+xgb.train<-bind_cols(select_if(train.set,is.numeric),model.matrix(~cut-1,train.set) %>% as.tibble(),model.matrix(~color-1,train.set) %>% as.tibble(),model.matrix(~clarity-1,train.set) %>% as.tibble())
+xgboost.train<-xgb.DMatrix(data = as.matrix(select(xgb.train,-price)),label=xgb.train$price)
+xgb.test<-bind_cols(select_if(test.set,is.numeric),model.matrix(~cut-1,test.set) %>% as.tibble(),model.matrix(~color-1,test.set) %>% as.tibble(),model.matrix(~clarity-1,test.set) %>% as.tibble())
+xgboost.test<-xgb.DMatrix(data = select(xgb.test,-price) %>% as.matrix(),label=xgb.test$price)
+
+param<-list(eval_metric='rmse',gamma=1,max_depth=6,nthread = 3)
+xg.model<-xgb.train(data = xgboost.train,params = param,watchlist = list(test=xgboost.test),nrounds = 500,early_stopping_rounds = 60,
+ print_every_n = 30)
+
+xg.predict<-predict(xg.model,xgboost.test)
+mse.xgb<-sqrt(mean((test.set$price-xg.predict)^2))
+plot((test.set$price-xg.predict))
diff --git a/Desktop/open-source/R/clustering_algorithms/dbscan_clustering.r b/Desktop/open-source/R/clustering_algorithms/dbscan_clustering.r
new file mode 100644
index 00000000..b5e0ba2e
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/dbscan_clustering.r
@@ -0,0 +1,3 @@
+library(dbscan)
+cl <- dbscan(iris[,-5], eps = .5, minPts = 5)
+plot(iris[,-5], col = cl$cluster)
diff --git a/Desktop/open-source/R/clustering_algorithms/gmm.r b/Desktop/open-source/R/clustering_algorithms/gmm.r
new file mode 100644
index 00000000..696009ac
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/gmm.r
@@ -0,0 +1,4 @@
+library(mclust) # Gaussian mixture model (GMM)
+gmm_fit <- Mclust(iris[, 1:4]) # Fit a GMM model
+summary(gmm_fit) # Summary table
+plot(gmm_fit, 'BIC') # Select model based on BIC
diff --git a/Desktop/open-source/R/clustering_algorithms/heirarchical_clustering.r b/Desktop/open-source/R/clustering_algorithms/heirarchical_clustering.r
new file mode 100644
index 00000000..d6ea84cc
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/heirarchical_clustering.r
@@ -0,0 +1,3 @@
+set.seed(42)
+clusters <- hclust(dist(iris[, -5]))
+plot(clusters)
diff --git a/Desktop/open-source/R/clustering_algorithms/k_means.r b/Desktop/open-source/R/clustering_algorithms/k_means.r
new file mode 100644
index 00000000..735948e8
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/k_means.r
@@ -0,0 +1,3 @@
+library(cluster)
+set.seed(42)
+fit <- kmeans(X, 3) # 3 cluster solution
diff --git a/Desktop/open-source/R/clustering_algorithms/kmeans_clustering.r b/Desktop/open-source/R/clustering_algorithms/kmeans_clustering.r
new file mode 100644
index 00000000..2a19d2d4
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/kmeans_clustering.r
@@ -0,0 +1,4 @@
+set.seed(42)
+cl <- kmeans(iris[,-5], 3)
+plot(iris[,-5], col = cl$cluster)
+points(cl$centers, col = 1:3, pch = 8)
diff --git a/Desktop/open-source/R/clustering_algorithms/kmeans_raw_r.r b/Desktop/open-source/R/clustering_algorithms/kmeans_raw_r.r
new file mode 100644
index 00000000..1cb7d69e
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/kmeans_raw_r.r
@@ -0,0 +1,53 @@
+custonKmeans<-function(dataset=NA,k=NA){
+ if(is.na(dataset) || is.na(k)){
+ stop("You must input valid parameters!")
+ }
+ Eudist<-function(x,y){
+ distance<-sqrt(sum((x-y)^2))
+ return (distance)
+ }
+
+ rows.dataset<-nrow(dataset)
+ continue.change=TRUE
+ initPoint<-dataset[sample.int(rows.dataset,size = k),]
+ formerPoint<-initPoint
+ iterPoint<-matrix(0,nrow = k,ncol = ncol(dataset))
+
+ #记录每一个点到每一个类的距离
+ error.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ while(continue.change){
+ #记录每个点所属的类是哪一个
+ cluster.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ for(i in 1:rows.dataset){#计算每个点到三个初始中心点的距离
+ for(j in 1:k){
+ error.matrix[i,j]<-Eudist(dataset[i,],formerPoint[j,])
+ }
+ }
+ #将每一个点所属的类计算出来
+ for(i in 1:rows.dataset){
+ cluster.matrix[i,which.min(error.matrix[i,])]<-1
+ }
+
+ #更新新的质心位置
+ for(i in 1:k){
+ iterPoint[i,]<-apply(dataset[which(cluster.matrix[,i] == 1),],2,"mean")
+ }
+ all.true<-c()
+ for(i in 1:k){
+ if(all(formerPoint[i,] == iterPoint[i,]) == T){
+ all.true[i]<-TRUE
+ }
+ }
+ formerPoint = iterPoint
+ continue.change=ifelse(all(all.true) == T,F,T)
+ }
+ colnames(iterPoint)<-colnames(dataset)
+ out=list()
+ out[["centers"]]<-iterPoint
+ out[["distance"]]<-error.matrix
+ out[["cluster"]]<-rep(1,rows.dataset)
+ for(i in 1:rows.dataset){
+ out[["cluster"]][i]<-which(cluster.matrix[i,] == 1)
+ }
+ return(out)
+}
diff --git a/Desktop/open-source/R/clustering_algorithms/pam.r b/Desktop/open-source/R/clustering_algorithms/pam.r
new file mode 100644
index 00000000..b2148e75
--- /dev/null
+++ b/Desktop/open-source/R/clustering_algorithms/pam.r
@@ -0,0 +1,3 @@
+library(cluster)
+pam_fit <- pam(iris[, 1:4], 5) # Partition Around Medoids
+summary(pam_fit) # Get summary
diff --git a/Desktop/open-source/R/data_manipulation/label_encode.r b/Desktop/open-source/R/data_manipulation/label_encode.r
new file mode 100644
index 00000000..1a2cdb48
--- /dev/null
+++ b/Desktop/open-source/R/data_manipulation/label_encode.r
@@ -0,0 +1,31 @@
+library(tidyverse)
+#Divide data into train and test in 70% and 30%
+ind<-sample(2,nrow(diamonds),replace = T,prob = c(0.7,0.3))
+train.set <- diamonds[ind==1,]
+test.set <- diamonds[ind==2,]
+
+#Combine the dataset using rbind function(inbuilt function)
+combi <- rbind(train.set, test.set)
+
+##Label Encoding
+combi[, cut_num := ifelse(cut == "Fair",0,
+ ifelse(cut == "Good",1,
+ ifelse(cut == "Very Good",2,
+ ifelse(cut == "Premium",3,4))))]
+combi[, color_num := ifelse(color == "D",0,
+ ifelse(color == "E",2,
+ ifelse(color == "F",3,
+ ifelse(color == "G",4,
+ ifelse(color == "H",5,
+ ifelse(color == "I",6,7))))))]
+
+# Column "clarity" won't be taken in label encoding as it contains more variables.
+#The more variables in column in label encoding, the model will perform less.
+
+#Removing categorical variables after label encoding
+combi[,c("color", "cut") := NULL)
+
+#Divide data back into train and test in 70% and 30%
+ind<-sample(2,nrow(combi),replace = T,prob = c(0.7,0.3))
+train.set <- combi[ind==1,]
+test.set <- combi[ind==2,]
diff --git a/Desktop/open-source/R/data_manipulation/one_hot_encode.r b/Desktop/open-source/R/data_manipulation/one_hot_encode.r
new file mode 100644
index 00000000..f5a59b49
--- /dev/null
+++ b/Desktop/open-source/R/data_manipulation/one_hot_encode.r
@@ -0,0 +1,27 @@
+oneHotEncode <- function(x, fullRank = T){
+ if(fullRank){
+ return(model.matrix(~ 0 + ., data = x))
+ } else {
+ charCols <- colnames(x)[sapply(x, is.character)]
+ if(length(charCols) > 0){
+ for(col in charCols){
+ x[[eval(col)]] <- factor(x[[eval(col)]])
+ }
+ }
+ factorCols <- colnames(x)[sapply(x, is.factor)]
+ contrastsList <- vector(mode = "list", length = length(factorCols))
+ names(contrastsList) <- factorCols
+ if(length(factorCols) > 0){
+ for(col in factorCols){
+ contrastsList[[eval(col)]] <- contrasts(x[[eval(col)]], contrasts = F)
+ }
+ return(model.matrix(~ 0 + ., data = x, contrasts = contrastsList))
+ } else {
+ return(model.matrix(~ 0 + ., data = x))
+ }
+ }
+}
+
+diamonds <- ggplot2::diamonds
+head(oneHotEncode(diamonds))
+head(oneHotEncode(diamonds, fullRank = F))
diff --git a/Desktop/open-source/R/data_manipulation/shorten.vector.r b/Desktop/open-source/R/data_manipulation/shorten.vector.r
new file mode 100644
index 00000000..00460f2c
--- /dev/null
+++ b/Desktop/open-source/R/data_manipulation/shorten.vector.r
@@ -0,0 +1,23 @@
+shorten.vector <- function(vector,by){
+ # get last elements
+ vec_new <- vector |> tail(by)
+
+ # get index of last elements
+ index <- c()
+ for(i in vec_new){
+ values <- which(vector == i)
+ index <- c(index,values)
+ }
+
+ # delete index from vector
+ final_vec <- vector[-c(index)]
+
+ # return final output
+ return(final_vec)
+
+
+}
+
+fruits <- c("Pear","Banana","Peach","Grape","Apple","Orange")
+
+shorten.vector(fruits,by = 1)
diff --git a/Desktop/open-source/R/data_mining/.gitignore b/Desktop/open-source/R/data_mining/.gitignore
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/Desktop/open-source/R/data_mining/.gitignore
@@ -0,0 +1 @@
+
diff --git a/Desktop/open-source/R/data_mining/README.md b/Desktop/open-source/R/data_mining/README.md
new file mode 100644
index 00000000..c3e99506
--- /dev/null
+++ b/Desktop/open-source/R/data_mining/README.md
@@ -0,0 +1,5 @@
+# Data Mining in R
+
+## Tutorials
+
+ - [Top 10 data mining algorithms in plain R](https://hackerbits.com/data/top-10-data-mining-algorithms-in-plain-r/)
diff --git a/Desktop/open-source/R/data_preprocessing/data_normalization_standardization.r b/Desktop/open-source/R/data_preprocessing/data_normalization_standardization.r
new file mode 100644
index 00000000..5ae62bcf
--- /dev/null
+++ b/Desktop/open-source/R/data_preprocessing/data_normalization_standardization.r
@@ -0,0 +1,49 @@
+# normalization & standardization
+normalization<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+
+standardization<-function(x){
+ return((x-mean(x))/sd(x))
+}
+
+head(iris)
+# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+# 1 5.1 3.5 1.4 0.2 setosa
+# 2 4.9 3.0 1.4 0.2 setosa
+# 3 4.7 3.2 1.3 0.2 setosa
+# 4 4.6 3.1 1.5 0.2 setosa
+# 5 5.0 3.6 1.4 0.2 setosa
+# 6 5.4 3.9 1.7 0.4 setosa
+
+iris<-iris[,-5]
+head(iris)
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# 1 5.1 3.5 1.4 0.2
+# 2 4.9 3.0 1.4 0.2
+# 3 4.7 3.2 1.3 0.2
+# 4 4.6 3.1 1.5 0.2
+# 5 5.0 3.6 1.4 0.2
+# 6 5.4 3.9 1.7 0.4
+
+#normalize
+apply(as.matrix(iris),2,normalization)
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] 0.22222222 0.62500000 0.06779661 0.04166667
+# [2,] 0.16666667 0.41666667 0.06779661 0.04166667
+# [3,] 0.11111111 0.50000000 0.05084746 0.04166667
+# [4,] 0.08333333 0.45833333 0.08474576 0.04166667
+# [5,] 0.19444444 0.66666667 0.06779661 0.04166667
+# [6,] 0.30555556 0.79166667 0.11864407 0.12500000
+# [7,] 0.08333333 0.58333333 0.06779661 0.08333333
+
+#standardize
+apply(as.matrix(iris),2,standardization)
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] -0.89767388 1.01560199 -1.33575163 -1.3110521482
+# [2,] -1.13920048 -0.13153881 -1.33575163 -1.3110521482
+# [3,] -1.38072709 0.32731751 -1.39239929 -1.3110521482
+# [4,] -1.50149039 0.09788935 -1.27910398 -1.3110521482
+# [5,] -1.01843718 1.24503015 -1.33575163 -1.3110521482
+# [6,] -0.53538397 1.93331463 -1.16580868 -1.0486667950
+# [7,] -1.50149039 0.78617383 -1.33575163 -1.1798594716
\ No newline at end of file
diff --git a/Desktop/open-source/R/data_preprocessing/data_processing.r b/Desktop/open-source/R/data_preprocessing/data_processing.r
new file mode 100644
index 00000000..23e71b0d
--- /dev/null
+++ b/Desktop/open-source/R/data_preprocessing/data_processing.r
@@ -0,0 +1,134 @@
+library(xlsx)
+## Loading required package: rJava
+## Loading required package: xlsxjars
+
+setwd("/Users/chenfeiyang")
+cameraData <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, header = TRUE)
+cameraData <- read.xlsx("./data/cameras.xlsx", "Baltimore Fixed Speed Cameras",
+ header = TRUE)
+head(cameraData)
+## address direction street crossStreet
+## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
+## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
+## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
+## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
+## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
+## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
+## intersection Location.1
+## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
+## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
+## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
+## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
+## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
+## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
+
+# Read specific rows and columns in Excel
+colIndex <- 2:3
+rowIndex <- 1:4
+cameraDataSubset <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, colIndex = colIndex,
+ rowIndex = rowIndex)
+cameraDataSubset
+## direction street
+## 1 N/B Caton Ave
+## 2 S/B Caton Ave
+## 3 E/B Wilkens Ave
+
+# Subsetting - quick review
+set.seed(13435)
+X <- data.frame(var1 = sample(1:5), var2 = sample(6:10), var3 = sample(11:15))
+X <- X[sample(1:5), ]
+X$var2[c(1, 3)] = NA
+X
+## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+## 3 5 6 14
+## 5 4 9 13
+
+X[, 1]
+## [1] 2 1 3 5 4
+X[, "var1"]
+## [1] 2 1 3 5 4
+X[1:2, "var2"]
+## [1] NA 10
+
+# Logicals and: & , or: |
+X[(X$var1 <= 3 & X$var3 > 11), ]
+## var1 var2 var3
+## 1 2 NA 15
+## 2 3 NA 12
+X[(X$var1 <= 3 | X$var3 > 15), ]
+## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+
+## Dealing with missing values
+X[which(X$var2 > 8), ]
+## var1 var2 var3
+## 4 1 10 11
+## 5 4 9 13
+
+# Sorting
+sort(X$var1)
+## [1] 1 2 3 4 5
+sort(X$var1, decreasing = TRUE)
+## [1] 5 4 3 2 1
+sort(X$var2, na.last = TRUE)
+## [1] 6 9 10 NA NA
+
+# Ordering
+X[order(X$var1), ]
+## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+X[order(X$var1, X$var3), ]
+## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+## Sort using the arrange function of the plyr package
+
+library(plyr)
+arrange(X, var1)
+## var1 var2 var3
+## 1 1 10 11
+## 2 2 NA 15
+## 3 3 NA 12
+## 4 4 9 13
+## 5 5 6 14
+
+arrange(X, desc(var1))
+## var1 var2 var3
+## 1 5 6 14
+## 2 4 9 13
+## 3 3 NA 12
+## 4 2 NA 15
+## 5 1 10 11
+
+# Add row and column
+X$var4 <- rnorm(5)
+X
+## var1 var2 var3 var4
+## 1 2 NA 15 0.18760
+## 4 1 10 11 1.78698
+## 2 3 NA 12 0.49669
+## 3 5 6 14 0.06318
+## 5 4 9 13 -0.53613
+
+Y <- cbind(X, rnorm(5))
+Y
+## var1 var2 var3 var4 rnorm(5)
+## 1 2 NA 15 0.18760 0.62578
+## 4 1 10 11 1.78698 -2.45084
+## 2 3 NA 12 0.49669 0.08909
+## 3 5 6 14 0.06318 0.47839
+## 5 4 9 13 -0.53613 1.00053
diff --git a/Desktop/open-source/R/data_preprocessing/dimensionality_reduction_algorithms.r b/Desktop/open-source/R/data_preprocessing/dimensionality_reduction_algorithms.r
new file mode 100644
index 00000000..a4b6dbc1
--- /dev/null
+++ b/Desktop/open-source/R/data_preprocessing/dimensionality_reduction_algorithms.r
@@ -0,0 +1,4 @@
+library(stats)
+pca <- princomp(train, cor = TRUE)
+train_reduced <- predict(pca,train)
+test_reduced <- predict(pca,test)
diff --git a/Desktop/open-source/R/data_preprocessing/k_folds.r b/Desktop/open-source/R/data_preprocessing/k_folds.r
new file mode 100644
index 00000000..37ea54a8
--- /dev/null
+++ b/Desktop/open-source/R/data_preprocessing/k_folds.r
@@ -0,0 +1,29 @@
+# K folds cross validation is essential for machine learning
+# createFolds function in package caret is easy to use
+# here we write our own function
+
+get_k_folds<-function(y = c(),k = 10, isList = TRUE, seed = 123){
+ set.seed(seed)
+ folds<-sample(1:length(y), length(y))
+ every_n<-ceiling(length(y)/k)
+ matFolds<-suppressWarnings(matrix(folds, ncol=every_n, byrow = T))
+
+ if(isList){
+ value<-NULL
+ rownames(matFolds)<-paste("Folds",1:k,sep="")
+ value<-lapply(1:k, function(x){
+ if(x == k){
+ return(matFolds[x,][1:(length(y)-every_n*(k-1))])
+ }else{
+ return(matFolds[x,])
+ }
+ })
+ }else{
+ value<-c()
+ for(i in 1:length(y)){
+ value[i]<-ceiling(i/every_n)
+ }
+ }
+
+ return(value)
+}
diff --git a/Desktop/open-source/R/data_preprocessing/lasso.r b/Desktop/open-source/R/data_preprocessing/lasso.r
new file mode 100644
index 00000000..01bbc89c
--- /dev/null
+++ b/Desktop/open-source/R/data_preprocessing/lasso.r
@@ -0,0 +1,20 @@
+data(ggplot2::diamonds)
+library(caret)
+library(dplyr)
+dia.trans<-bind_cols(diamonds %>% select_if(is.numeric),
+ model.matrix(~cut-1,diamonds) %>% as_tibble(),
+ model.matrix(~color-1,diamonds) %>% as_tibble(),
+ model.matrix(~clarity-1,diamonds) %>% as_tibble())
+
+#setting parameters alpha and lambda
+lasso_expand<-expand.grid(alpha = 1, lambda = seq(0.001,0.1,by = 0.0005))
+lasso_mod <- train(x=dia.trans %>% select(-price), y=dia.trans$price, method='glmnet',
+ tuneGrid=lasso_expand)
+
+#best tune
+lasso_mod$bestTune
+lasso_mod$results$RMSE
+
+lasso_imp<-varImp(lasso_mod)
+#get the importance of each feature and eliminate some of them
+lasso_imp$importance
diff --git a/Desktop/open-source/R/data_structures/binary_search_tree.r b/Desktop/open-source/R/data_structures/binary_search_tree.r
new file mode 100644
index 00000000..d2743515
--- /dev/null
+++ b/Desktop/open-source/R/data_structures/binary_search_tree.r
@@ -0,0 +1,489 @@
+# Binary Search Tree (BST) Implementation
+#
+# A Binary Search Tree is a hierarchical data structure where each node has at most
+# two children (left and right), and for every node:
+# - All values in the left subtree are less than the node's value
+# - All values in the right subtree are greater than the node's value
+# - Both subtrees are also binary search trees
+#
+# Time Complexities (average case):
+# - Search: O(log n)
+# - Insert: O(log n)
+# - Delete: O(log n)
+# - Traversal: O(n)
+#
+# Worst case: O(n) when tree becomes skewed (like a linked list)
+#
+# Applications:
+# - Database indexing
+# - Expression parsing
+# - Priority queues
+# - File system organization
+
+# Define BST Node structure using R6 class system
+if (!require(R6, quietly = TRUE)) {
+ cat("Installing R6 package for object-oriented programming...\n")
+ install.packages("R6", quiet = TRUE)
+ library(R6)
+}
+
+# BST Node class
+BSTNode <- R6Class("BSTNode",
+ public = list(
+ value = NULL,
+ left = NULL,
+ right = NULL,
+
+ initialize = function(value) {
+ self$value <- value
+ self$left <- NULL
+ self$right <- NULL
+ }
+ )
+)
+
+# Binary Search Tree class
+BST <- R6Class("BST",
+ public = list(
+ root = NULL,
+ size = 0,
+
+ initialize = function() {
+ self$root <- NULL
+ self$size <- 0
+ },
+
+ # Insert a value into the BST
+ insert = function(value) {
+ if (is.null(self$root)) {
+ self$root <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(self$root, value)
+ }
+ },
+
+ # Search for a value in the BST
+ search = function(value) {
+ return(private$search_recursive(self$root, value))
+ },
+
+ # Delete a value from the BST
+ delete = function(value) {
+ if (self$search(value)) {
+ self$root <- private$delete_recursive(self$root, value)
+ self$size <- self$size - 1
+ return(TRUE)
+ }
+ return(FALSE)
+ },
+
+ # Find minimum value in the BST
+ find_min = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_min_recursive(self$root)$value)
+ },
+
+ # Find maximum value in the BST
+ find_max = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_max_recursive(self$root)$value)
+ },
+
+ # In-order traversal (left, root, right) - gives sorted output
+ inorder_traversal = function() {
+ result <- c()
+ private$inorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Pre-order traversal (root, left, right)
+ preorder_traversal = function() {
+ result <- c()
+ private$preorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Post-order traversal (left, right, root)
+ postorder_traversal = function() {
+ result <- c()
+ private$postorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Level-order traversal (breadth-first)
+ level_order_traversal = function() {
+ if (is.null(self$root)) return(c())
+
+ result <- c()
+ queue <- list(self$root)
+
+ while (length(queue) > 0) {
+ node <- queue[[1]]
+ queue <- queue[-1]
+
+ result <- c(result, node$value)
+
+ if (!is.null(node$left)) {
+ queue <- append(queue, list(node$left))
+ }
+ if (!is.null(node$right)) {
+ queue <- append(queue, list(node$right))
+ }
+ }
+
+ return(result)
+ },
+
+ # Get height of the tree
+ height = function() {
+ return(private$height_recursive(self$root))
+ },
+
+ # Check if tree is valid BST
+ is_valid_bst = function() {
+ return(private$is_valid_bst_recursive(self$root, -Inf, Inf))
+ },
+
+ # Get size of the tree
+ get_size = function() {
+ return(self$size)
+ },
+
+ # Check if tree is empty
+ is_empty = function() {
+ return(is.null(self$root))
+ },
+
+ # Print tree structure
+ print_tree = function() {
+ if (is.null(self$root)) {
+ cat("Empty tree\n")
+ return()
+ }
+ private$print_tree_recursive(self$root, "", TRUE)
+ }
+ ),
+
+ private = list(
+ # Recursive helper for insertion
+ insert_recursive = function(node, value) {
+ if (value < node$value) {
+ if (is.null(node$left)) {
+ node$left <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$left, value)
+ }
+ } else if (value > node$value) {
+ if (is.null(node$right)) {
+ node$right <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$right, value)
+ }
+ }
+ # If value == node$value, don't insert (no duplicates)
+ },
+
+ # Recursive helper for search
+ search_recursive = function(node, value) {
+ if (is.null(node) || node$value == value) {
+ return(!is.null(node))
+ }
+
+ if (value < node$value) {
+ return(private$search_recursive(node$left, value))
+ } else {
+ return(private$search_recursive(node$right, value))
+ }
+ },
+
+ # Recursive helper for deletion
+ delete_recursive = function(node, value) {
+ if (is.null(node)) {
+ return(NULL)
+ }
+
+ if (value < node$value) {
+ node$left <- private$delete_recursive(node$left, value)
+ } else if (value > node$value) {
+ node$right <- private$delete_recursive(node$right, value)
+ } else {
+ # Node to delete found
+ if (is.null(node$left)) {
+ return(node$right)
+ } else if (is.null(node$right)) {
+ return(node$left)
+ }
+
+ # Node has two children - find inorder successor
+ successor <- private$find_min_recursive(node$right)
+ node$value <- successor$value
+ node$right <- private$delete_recursive(node$right, successor$value)
+ }
+
+ return(node)
+ },
+
+ # Find minimum node
+ find_min_recursive = function(node) {
+ while (!is.null(node$left)) {
+ node <- node$left
+ }
+ return(node)
+ },
+
+ # Find maximum node
+ find_max_recursive = function(node) {
+ while (!is.null(node$right)) {
+ node <- node$right
+ }
+ return(node)
+ },
+
+ # In-order traversal helper
+ inorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$inorder_recursive(node$left, result)
+ result <<- c(result, node$value)
+ result <<- private$inorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Pre-order traversal helper
+ preorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- c(result, node$value)
+ result <<- private$preorder_recursive(node$left, result)
+ result <<- private$preorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Post-order traversal helper
+ postorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$postorder_recursive(node$left, result)
+ result <<- private$postorder_recursive(node$right, result)
+ result <<- c(result, node$value)
+ }
+ return(result)
+ },
+
+ # Height calculation helper
+ height_recursive = function(node) {
+ if (is.null(node)) {
+ return(-1) # Height of empty tree is -1
+ }
+
+ left_height <- private$height_recursive(node$left)
+ right_height <- private$height_recursive(node$right)
+
+ return(1 + max(left_height, right_height))
+ },
+
+ # BST validation helper
+ is_valid_bst_recursive = function(node, min_val, max_val) {
+ if (is.null(node)) {
+ return(TRUE)
+ }
+
+ if (node$value <= min_val || node$value >= max_val) {
+ return(FALSE)
+ }
+
+ return(private$is_valid_bst_recursive(node$left, min_val, node$value) &&
+ private$is_valid_bst_recursive(node$right, node$value, max_val))
+ },
+
+ # Tree printing helper
+ print_tree_recursive = function(node, prefix, is_last) {
+ if (!is.null(node)) {
+ cat(prefix)
+ cat(if (is_last) "└── " else "├── ")
+ cat(node$value, "\n")
+
+ new_prefix <- paste0(prefix, if (is_last) " " else "│ ")
+
+ if (!is.null(node$left) || !is.null(node$right)) {
+ if (!is.null(node$left)) {
+ private$print_tree_recursive(node$left, new_prefix, is.null(node$right))
+ }
+ if (!is.null(node$right)) {
+ private$print_tree_recursive(node$right, new_prefix, TRUE)
+ }
+ }
+ }
+ }
+ )
+)
+
+# Utility functions for BST operations
+
+# Create BST from array
+create_bst_from_array <- function(arr) {
+ #' Create a BST from an array of values
+ #' @param arr: Array of values to insert
+ #' @return: BST object
+
+ bst <- BST$new()
+ for (value in arr) {
+ bst$insert(value)
+ }
+ return(bst)
+}
+
+# Check if two BSTs are identical
+are_identical_bsts <- function(bst1, bst2) {
+ #' Check if two BSTs have identical structure and values
+ #' @param bst1: First BST
+ #' @param bst2: Second BST
+ #' @return: TRUE if identical, FALSE otherwise
+
+ traversal1 <- bst1$preorder_traversal()
+ traversal2 <- bst2$preorder_traversal()
+
+ return(identical(traversal1, traversal2))
+}
+
+# Find kth smallest element in BST
+kth_smallest <- function(bst, k) {
+ #' Find the kth smallest element in BST
+ #' @param bst: BST object
+ #' @param k: Position (1-indexed)
+ #' @return: kth smallest value or NULL if k is out of bounds
+
+ inorder <- bst$inorder_traversal()
+ if (k > 0 && k <= length(inorder)) {
+ return(inorder[k])
+ }
+ return(NULL)
+}
+
+# Example usage and testing
+cat("=== Binary Search Tree (BST) Implementation ===\n\n")
+
+# Test 1: Basic BST operations
+cat("1. Basic BST Operations\n")
+bst <- BST$new()
+
+# Insert values
+values <- c(50, 30, 70, 20, 40, 60, 80)
+cat("Inserting values:", paste(values, collapse = ", "), "\n")
+
+for (value in values) {
+ bst$insert(value)
+}
+
+cat("Tree size:", bst$get_size(), "\n")
+cat("Tree height:", bst$height(), "\n")
+cat("Is valid BST:", bst$is_valid_bst(), "\n\n")
+
+# Test 2: Tree visualization
+cat("2. Tree Structure\n")
+bst$print_tree()
+cat("\n")
+
+# Test 3: Search operations
+cat("3. Search Operations\n")
+search_values <- c(40, 25, 80, 100)
+for (value in search_values) {
+ found <- bst$search(value)
+ cat("Search for", value, ":", if (found) "Found" else "Not found", "\n")
+}
+cat("\n")
+
+# Test 4: Min/Max operations
+cat("4. Min/Max Operations\n")
+cat("Minimum value:", bst$find_min(), "\n")
+cat("Maximum value:", bst$find_max(), "\n\n")
+
+# Test 5: Tree traversals
+cat("5. Tree Traversals\n")
+cat("In-order (sorted): ", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+cat("Pre-order: ", paste(bst$preorder_traversal(), collapse = ", "), "\n")
+cat("Post-order: ", paste(bst$postorder_traversal(), collapse = ", "), "\n")
+cat("Level-order (BFS): ", paste(bst$level_order_traversal(), collapse = ", "), "\n\n")
+
+# Test 6: Deletion operations
+cat("6. Deletion Operations\n")
+delete_values <- c(20, 30, 50) # Delete leaf, node with one child, root
+
+for (value in delete_values) {
+ cat("Deleting", value, ":", if (bst$delete(value)) "Success" else "Failed", "\n")
+ cat("Tree after deletion:\n")
+ bst$print_tree()
+ cat("In-order traversal:", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+ cat("Tree size:", bst$get_size(), "\n\n")
+}
+
+# Test 7: Create BST from sorted vs unsorted array
+cat("7. BST Creation Comparison\n")
+
+# Sorted array (creates skewed tree)
+sorted_array <- c(1, 2, 3, 4, 5, 6, 7)
+bst_sorted <- create_bst_from_array(sorted_array)
+cat("BST from sorted array [1,2,3,4,5,6,7]:\n")
+cat("Height:", bst_sorted$height(), "(should be close to log₂(7) ≈ 2.8 for balanced)\n")
+bst_sorted$print_tree()
+
+# Shuffled array (more balanced)
+shuffled_array <- c(4, 2, 6, 1, 3, 5, 7)
+bst_shuffled <- create_bst_from_array(shuffled_array)
+cat("BST from shuffled array [4,2,6,1,3,5,7]:\n")
+cat("Height:", bst_shuffled$height(), "\n")
+bst_shuffled$print_tree()
+
+# Test 8: kth smallest element
+cat("8. Finding kth Smallest Elements\n")
+for (k in 1:min(5, bst_shuffled$get_size())) {
+ kth_val <- kth_smallest(bst_shuffled, k)
+ cat(k, "smallest element:", kth_val, "\n")
+}
+cat("\n")
+
+# Test 9: Edge cases
+cat("9. Edge Cases\n")
+empty_bst <- BST$new()
+cat("Empty BST:\n")
+cat("Is empty:", empty_bst$is_empty(), "\n")
+cat("Size:", empty_bst$get_size(), "\n")
+cat("Height:", empty_bst$height(), "\n")
+cat("Min value:", empty_bst$find_min(), "\n")
+cat("Search for 5:", empty_bst$search(5), "\n")
+cat("Delete 5:", empty_bst$delete(5), "\n")
+
+# Single node BST
+single_bst <- BST$new()
+single_bst$insert(42)
+cat("\nSingle node BST:\n")
+cat("Size:", single_bst$get_size(), "\n")
+cat("Height:", single_bst$height(), "\n")
+cat("Min/Max:", single_bst$find_min(), "/", single_bst$find_max(), "\n")
+single_bst$print_tree()
+
+# Test 10: Real-world example - Student grades
+cat("\n10. Real-world Example - Student Grade Management\n")
+grade_bst <- BST$new()
+grades <- c(85, 92, 78, 96, 83, 88, 91, 79, 87, 94)
+
+cat("Student grades:", paste(grades, collapse = ", "), "\n")
+for (grade in grades) {
+ grade_bst$insert(grade)
+}
+
+cat("Grades in ascending order:", paste(grade_bst$inorder_traversal(), collapse = ", "), "\n")
+cat("Highest grade:", grade_bst$find_max(), "\n")
+cat("Lowest grade:", grade_bst$find_min(), "\n")
+cat("Median grade (middle element):", kth_smallest(grade_bst, ceiling(grade_bst$get_size()/2)), "\n")
+
+# Find students above certain grade
+threshold <- 90
+above_threshold <- grade_bst$inorder_traversal()
+above_threshold <- above_threshold[above_threshold >= threshold]
+cat("Grades >=", threshold, ":", paste(above_threshold, collapse = ", "), "\n")
+
+cat("\nGrade distribution tree:\n")
+grade_bst$print_tree()
\ No newline at end of file
diff --git a/Desktop/open-source/R/documentation/ann.html b/Desktop/open-source/R/documentation/ann.html
new file mode 100644
index 00000000..93d6e584
--- /dev/null
+++ b/Desktop/open-source/R/documentation/ann.html
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(neuralnet)
+
+
## Error in library(neuralnet): there is no package called 'neuralnet'
+
+
concrete<-read.csv(file = "concrete.txt",stringsAsFactors = F)#get the data
+
+
## Warning in file(file, "rt"): cannot open file 'concrete.txt': No such file or
+## directory
+
+
## Error in file(file, "rt"): cannot open the connection
+
+
normalize<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+concrete<-as.data.frame(lapply(concrete, normalize))
+
+
## Error in lapply(concrete, normalize): object 'concrete' not found
+
+
concrete_train<-concrete[1:773,]
+
+
## Error: object 'concrete' not found
+
+
concrete_test<-concrete[774:1030,]
+
+
## Error: object 'concrete' not found
+
+
concrete_model<-neuralnet(strength~cement+slag+ash+water+superplastic+coarseagg+fineagg+age,data = concrete_train,hidden = 5)
+
+
## Error in neuralnet(strength ~ cement + slag + ash + water + superplastic + : could not find function "neuralnet"
+
+
model_res<-compute(concrete_model,concrete_test[,1:8])
+
+
## Error in compute(concrete_model, concrete_test[, 1:8]): could not find function "compute"
+
+
x=model_res$net.result
+
+
## Error: object 'model_res' not found
+
+
y=concrete_test$strength
+
+
## Error: object 'concrete_test' not found
+
+
cor(x,y)
+
+
## Error in is.data.frame(y): object 'y' not found
+
+
plot(concrete_model)
+
+
## Error in plot(concrete_model): object 'concrete_model' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/ann.md b/Desktop/open-source/R/documentation/ann.md
new file mode 100644
index 00000000..4b578d47
--- /dev/null
+++ b/Desktop/open-source/R/documentation/ann.md
@@ -0,0 +1,98 @@
+
+
+``` r
+library(neuralnet)
+```
+
+```
+## Error in library(neuralnet): there is no package called 'neuralnet'
+```
+
+``` r
+concrete<-read.csv(file = "concrete.txt",stringsAsFactors = F)#get the data
+```
+
+```
+## Warning in file(file, "rt"): cannot open file 'concrete.txt': No such file or
+## directory
+```
+
+```
+## Error in file(file, "rt"): cannot open the connection
+```
+
+``` r
+normalize<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+concrete<-as.data.frame(lapply(concrete, normalize))
+```
+
+```
+## Error in lapply(concrete, normalize): object 'concrete' not found
+```
+
+``` r
+concrete_train<-concrete[1:773,]
+```
+
+```
+## Error: object 'concrete' not found
+```
+
+``` r
+concrete_test<-concrete[774:1030,]
+```
+
+```
+## Error: object 'concrete' not found
+```
+
+``` r
+concrete_model<-neuralnet(strength~cement+slag+ash+water+superplastic+coarseagg+fineagg+age,data = concrete_train,hidden = 5)
+```
+
+```
+## Error in neuralnet(strength ~ cement + slag + ash + water + superplastic + : could not find function "neuralnet"
+```
+
+``` r
+model_res<-compute(concrete_model,concrete_test[,1:8])
+```
+
+```
+## Error in compute(concrete_model, concrete_test[, 1:8]): could not find function "compute"
+```
+
+``` r
+x=model_res$net.result
+```
+
+```
+## Error: object 'model_res' not found
+```
+
+``` r
+y=concrete_test$strength
+```
+
+```
+## Error: object 'concrete_test' not found
+```
+
+``` r
+cor(x,y)
+```
+
+```
+## Error in is.data.frame(y): object 'y' not found
+```
+
+``` r
+plot(concrete_model)
+```
+
+```
+## Error in plot(concrete_model): object 'concrete_model' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/apriori.html b/Desktop/open-source/R/documentation/apriori.html
new file mode 100644
index 00000000..e2489b12
--- /dev/null
+++ b/Desktop/open-source/R/documentation/apriori.html
@@ -0,0 +1,153 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(arules)
+
+
## Error in library(arules): there is no package called 'arules'
+
+
groceries <- read.transactions("groceries.csv", sep=",")
+
+
## Error in read.transactions("groceries.csv", sep = ","): could not find function "read.transactions"
+
+
summary(groceries)
+
+
## Error in summary(groceries): object 'groceries' not found
+
+
itemFrequencyPlot(groceries, topN=20)
+
+
## Error in itemFrequencyPlot(groceries, topN = 20): could not find function "itemFrequencyPlot"
+
+
#sample for randomly extracting samples, image function for visualing sparse matrix
+image(sample(groceries,100))
+
+
## Error in sample(groceries, 100): object 'groceries' not found
+
+
groceries_rule <- apriori(data=groceries, parameter=list(support=0.006, confidence=0.25, minlen=2))
+
+
## Error in apriori(data = groceries, parameter = list(support = 0.006, confidence = 0.25, : could not find function "apriori"
+
+
plotly_arules(groceries_rule)
+
+
## Error in plotly_arules(groceries_rule): could not find function "plotly_arules"
+
+
summary(groceries_rule)
+
+
## Error in summary(groceries_rule): object 'groceries_rule' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/apriori.md b/Desktop/open-source/R/documentation/apriori.md
new file mode 100644
index 00000000..e595594f
--- /dev/null
+++ b/Desktop/open-source/R/documentation/apriori.md
@@ -0,0 +1,67 @@
+
+
+``` r
+library(arules)
+```
+
+```
+## Error in library(arules): there is no package called 'arules'
+```
+
+``` r
+groceries <- read.transactions("groceries.csv", sep=",")
+```
+
+```
+## Error in read.transactions("groceries.csv", sep = ","): could not find function "read.transactions"
+```
+
+``` r
+summary(groceries)
+```
+
+```
+## Error in summary(groceries): object 'groceries' not found
+```
+
+``` r
+itemFrequencyPlot(groceries, topN=20)
+```
+
+```
+## Error in itemFrequencyPlot(groceries, topN = 20): could not find function "itemFrequencyPlot"
+```
+
+``` r
+#sample for randomly extracting samples, image function for visualing sparse matrix
+image(sample(groceries,100))
+```
+
+```
+## Error in sample(groceries, 100): object 'groceries' not found
+```
+
+``` r
+groceries_rule <- apriori(data=groceries, parameter=list(support=0.006, confidence=0.25, minlen=2))
+```
+
+```
+## Error in apriori(data = groceries, parameter = list(support = 0.006, confidence = 0.25, : could not find function "apriori"
+```
+
+``` r
+plotly_arules(groceries_rule)
+```
+
+```
+## Error in plotly_arules(groceries_rule): could not find function "plotly_arules"
+```
+
+``` r
+summary(groceries_rule)
+```
+
+```
+## Error in summary(groceries_rule): object 'groceries_rule' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/binary_search.html b/Desktop/open-source/R/documentation/binary_search.html
new file mode 100644
index 00000000..62834b3a
--- /dev/null
+++ b/Desktop/open-source/R/documentation/binary_search.html
@@ -0,0 +1,151 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
binary_search <- function(arr, target) { #function for finding position of value
+ low <- 1
+ high <- length(arr)
+
+ while (low <= high) {
+ mid <- low + (high - low) %/% 2 #finding mid of array
+
+ if (arr[mid] == target) { #comapring the mis value with the value to search
+ return(mid) # Target found, return its index
+ } else if (arr[mid] < target) {
+ low <- mid + 1 # Search in the right half
+ } else {
+ high <- mid - 1 # Search in the left half
+ }
+ }
+ return(-1) # Target not found in the array
+}
+
+arr <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) #input array (hard code)
+target <- 7 #input value to be searched (hard code)
+
+result <- binary_search(arr, target) #binary_seach function calling
+
+if (result == -1) {
+ cat("Element", target, "not found in the array.\n")
+} else {
+ cat("Element", target, "found at position", result, ".\n")
+}
+
+
## Element 7 found at position 7 .
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/binary_search.md b/Desktop/open-source/R/documentation/binary_search.md
new file mode 100644
index 00000000..3b34a8e5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/binary_search.md
@@ -0,0 +1,37 @@
+
+
+``` r
+binary_search <- function(arr, target) { #function for finding position of value
+ low <- 1
+ high <- length(arr)
+
+ while (low <= high) {
+ mid <- low + (high - low) %/% 2 #finding mid of array
+
+ if (arr[mid] == target) { #comapring the mis value with the value to search
+ return(mid) # Target found, return its index
+ } else if (arr[mid] < target) {
+ low <- mid + 1 # Search in the right half
+ } else {
+ high <- mid - 1 # Search in the left half
+ }
+ }
+ return(-1) # Target not found in the array
+}
+
+arr <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) #input array (hard code)
+target <- 7 #input value to be searched (hard code)
+
+result <- binary_search(arr, target) #binary_seach function calling
+
+if (result == -1) {
+ cat("Element", target, "not found in the array.\n")
+} else {
+ cat("Element", target, "found at position", result, ".\n")
+}
+```
+
+```
+## Element 7 found at position 7 .
+```
+
diff --git a/Desktop/open-source/R/documentation/binary_search_tree.html b/Desktop/open-source/R/documentation/binary_search_tree.html
new file mode 100644
index 00000000..25464924
--- /dev/null
+++ b/Desktop/open-source/R/documentation/binary_search_tree.html
@@ -0,0 +1,819 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Binary Search Tree (BST) Implementation
+#
+# A Binary Search Tree is a hierarchical data structure where each node has at most
+# two children (left and right), and for every node:
+# - All values in the left subtree are less than the node's value
+# - All values in the right subtree are greater than the node's value
+# - Both subtrees are also binary search trees
+#
+# Time Complexities (average case):
+# - Search: O(log n)
+# - Insert: O(log n)
+# - Delete: O(log n)
+# - Traversal: O(n)
+#
+# Worst case: O(n) when tree becomes skewed (like a linked list)
+#
+# Applications:
+# - Database indexing
+# - Expression parsing
+# - Priority queues
+# - File system organization
+
+# Define BST Node structure using R6 class system
+if (!require(R6, quietly = TRUE)) {
+ cat("Installing R6 package for object-oriented programming...\n")
+ install.packages("R6", quiet = TRUE)
+ library(R6)
+}
+
+
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
+## logical.return = TRUE, : there is no package called 'R6'
+
+
## Installing R6 package for object-oriented programming...
+
+
# BST Node class
+BSTNode <- R6Class("BSTNode",
+ public = list(
+ value = NULL,
+ left = NULL,
+ right = NULL,
+
+ initialize = function(value) {
+ self$value <- value
+ self$left <- NULL
+ self$right <- NULL
+ }
+ )
+)
+
+# Binary Search Tree class
+BST <- R6Class("BST",
+ public = list(
+ root = NULL,
+ size = 0,
+
+ initialize = function() {
+ self$root <- NULL
+ self$size <- 0
+ },
+
+ # Insert a value into the BST
+ insert = function(value) {
+ if (is.null(self$root)) {
+ self$root <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(self$root, value)
+ }
+ },
+
+ # Search for a value in the BST
+ search = function(value) {
+ return(private$search_recursive(self$root, value))
+ },
+
+ # Delete a value from the BST
+ delete = function(value) {
+ if (self$search(value)) {
+ self$root <- private$delete_recursive(self$root, value)
+ self$size <- self$size - 1
+ return(TRUE)
+ }
+ return(FALSE)
+ },
+
+ # Find minimum value in the BST
+ find_min = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_min_recursive(self$root)$value)
+ },
+
+ # Find maximum value in the BST
+ find_max = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_max_recursive(self$root)$value)
+ },
+
+ # In-order traversal (left, root, right) - gives sorted output
+ inorder_traversal = function() {
+ result <- c()
+ private$inorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Pre-order traversal (root, left, right)
+ preorder_traversal = function() {
+ result <- c()
+ private$preorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Post-order traversal (left, right, root)
+ postorder_traversal = function() {
+ result <- c()
+ private$postorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Level-order traversal (breadth-first)
+ level_order_traversal = function() {
+ if (is.null(self$root)) return(c())
+
+ result <- c()
+ queue <- list(self$root)
+
+ while (length(queue) > 0) {
+ node <- queue[[1]]
+ queue <- queue[-1]
+
+ result <- c(result, node$value)
+
+ if (!is.null(node$left)) {
+ queue <- append(queue, list(node$left))
+ }
+ if (!is.null(node$right)) {
+ queue <- append(queue, list(node$right))
+ }
+ }
+
+ return(result)
+ },
+
+ # Get height of the tree
+ height = function() {
+ return(private$height_recursive(self$root))
+ },
+
+ # Check if tree is valid BST
+ is_valid_bst = function() {
+ return(private$is_valid_bst_recursive(self$root, -Inf, Inf))
+ },
+
+ # Get size of the tree
+ get_size = function() {
+ return(self$size)
+ },
+
+ # Check if tree is empty
+ is_empty = function() {
+ return(is.null(self$root))
+ },
+
+ # Print tree structure
+ print_tree = function() {
+ if (is.null(self$root)) {
+ cat("Empty tree\n")
+ return()
+ }
+ private$print_tree_recursive(self$root, "", TRUE)
+ }
+ ),
+
+ private = list(
+ # Recursive helper for insertion
+ insert_recursive = function(node, value) {
+ if (value < node$value) {
+ if (is.null(node$left)) {
+ node$left <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$left, value)
+ }
+ } else if (value > node$value) {
+ if (is.null(node$right)) {
+ node$right <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$right, value)
+ }
+ }
+ # If value == node$value, don't insert (no duplicates)
+ },
+
+ # Recursive helper for search
+ search_recursive = function(node, value) {
+ if (is.null(node) || node$value == value) {
+ return(!is.null(node))
+ }
+
+ if (value < node$value) {
+ return(private$search_recursive(node$left, value))
+ } else {
+ return(private$search_recursive(node$right, value))
+ }
+ },
+
+ # Recursive helper for deletion
+ delete_recursive = function(node, value) {
+ if (is.null(node)) {
+ return(NULL)
+ }
+
+ if (value < node$value) {
+ node$left <- private$delete_recursive(node$left, value)
+ } else if (value > node$value) {
+ node$right <- private$delete_recursive(node$right, value)
+ } else {
+ # Node to delete found
+ if (is.null(node$left)) {
+ return(node$right)
+ } else if (is.null(node$right)) {
+ return(node$left)
+ }
+
+ # Node has two children - find inorder successor
+ successor <- private$find_min_recursive(node$right)
+ node$value <- successor$value
+ node$right <- private$delete_recursive(node$right, successor$value)
+ }
+
+ return(node)
+ },
+
+ # Find minimum node
+ find_min_recursive = function(node) {
+ while (!is.null(node$left)) {
+ node <- node$left
+ }
+ return(node)
+ },
+
+ # Find maximum node
+ find_max_recursive = function(node) {
+ while (!is.null(node$right)) {
+ node <- node$right
+ }
+ return(node)
+ },
+
+ # In-order traversal helper
+ inorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$inorder_recursive(node$left, result)
+ result <<- c(result, node$value)
+ result <<- private$inorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Pre-order traversal helper
+ preorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- c(result, node$value)
+ result <<- private$preorder_recursive(node$left, result)
+ result <<- private$preorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Post-order traversal helper
+ postorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$postorder_recursive(node$left, result)
+ result <<- private$postorder_recursive(node$right, result)
+ result <<- c(result, node$value)
+ }
+ return(result)
+ },
+
+ # Height calculation helper
+ height_recursive = function(node) {
+ if (is.null(node)) {
+ return(-1) # Height of empty tree is -1
+ }
+
+ left_height <- private$height_recursive(node$left)
+ right_height <- private$height_recursive(node$right)
+
+ return(1 + max(left_height, right_height))
+ },
+
+ # BST validation helper
+ is_valid_bst_recursive = function(node, min_val, max_val) {
+ if (is.null(node)) {
+ return(TRUE)
+ }
+
+ if (node$value <= min_val || node$value >= max_val) {
+ return(FALSE)
+ }
+
+ return(private$is_valid_bst_recursive(node$left, min_val, node$value) &&
+ private$is_valid_bst_recursive(node$right, node$value, max_val))
+ },
+
+ # Tree printing helper
+ print_tree_recursive = function(node, prefix, is_last) {
+ if (!is.null(node)) {
+ cat(prefix)
+ cat(if (is_last) "└── " else "├── ")
+ cat(node$value, "\n")
+
+ new_prefix <- paste0(prefix, if (is_last) " " else "│ ")
+
+ if (!is.null(node$left) || !is.null(node$right)) {
+ if (!is.null(node$left)) {
+ private$print_tree_recursive(node$left, new_prefix, is.null(node$right))
+ }
+ if (!is.null(node$right)) {
+ private$print_tree_recursive(node$right, new_prefix, TRUE)
+ }
+ }
+ }
+ }
+ )
+)
+
+# Utility functions for BST operations
+
+# Create BST from array
+create_bst_from_array <- function(arr) {
+ #' Create a BST from an array of values
+ #' @param arr: Array of values to insert
+ #' @return: BST object
+
+ bst <- BST$new()
+ for (value in arr) {
+ bst$insert(value)
+ }
+ return(bst)
+}
+
+# Check if two BSTs are identical
+are_identical_bsts <- function(bst1, bst2) {
+ #' Check if two BSTs have identical structure and values
+ #' @param bst1: First BST
+ #' @param bst2: Second BST
+ #' @return: TRUE if identical, FALSE otherwise
+
+ traversal1 <- bst1$preorder_traversal()
+ traversal2 <- bst2$preorder_traversal()
+
+ return(identical(traversal1, traversal2))
+}
+
+# Find kth smallest element in BST
+kth_smallest <- function(bst, k) {
+ #' Find the kth smallest element in BST
+ #' @param bst: BST object
+ #' @param k: Position (1-indexed)
+ #' @return: kth smallest value or NULL if k is out of bounds
+
+ inorder <- bst$inorder_traversal()
+ if (k > 0 && k <= length(inorder)) {
+ return(inorder[k])
+ }
+ return(NULL)
+}
+
+# Example usage and testing
+cat("=== Binary Search Tree (BST) Implementation ===\n\n")
+
+
## === Binary Search Tree (BST) Implementation ===
+
+
# Test 1: Basic BST operations
+cat("1. Basic BST Operations\n")
+
+
## 1. Basic BST Operations
+
+
bst <- BST$new()
+
+# Insert values
+values <- c(50, 30, 70, 20, 40, 60, 80)
+cat("Inserting values:", paste(values, collapse = ", "), "\n")
+
+
## Inserting values: 50, 30, 70, 20, 40, 60, 80
+
+
for (value in values) {
+ bst$insert(value)
+}
+
+cat("Tree size:", bst$get_size(), "\n")
+
+
## Tree size: 7
+
+
cat("Tree height:", bst$height(), "\n")
+
+
## Tree height: 2
+
+
cat("Is valid BST:", bst$is_valid_bst(), "\n\n")
+
+
## Is valid BST: TRUE
+
+
# Test 2: Tree visualization
+cat("2. Tree Structure\n")
+
+
## 2. Tree Structure
+
+
bst$print_tree()
+
+
## └── 50
+## ├── 30
+## │ ├── 20
+## │ └── 40
+## └── 70
+## ├── 60
+## └── 80
+
+
cat("\n")
+
+
# Test 3: Search operations
+cat("3. Search Operations\n")
+
+
## 3. Search Operations
+
+
search_values <- c(40, 25, 80, 100)
+for (value in search_values) {
+ found <- bst$search(value)
+ cat("Search for", value, ":", if (found) "Found" else "Not found", "\n")
+}
+
+
## Search for 40 : Found
+## Search for 25 : Not found
+## Search for 80 : Found
+## Search for 100 : Not found
+
+
cat("\n")
+
+
# Test 4: Min/Max operations
+cat("4. Min/Max Operations\n")
+
+
## 4. Min/Max Operations
+
+
cat("Minimum value:", bst$find_min(), "\n")
+
+
## Minimum value: 20
+
+
cat("Maximum value:", bst$find_max(), "\n\n")
+
+
## Maximum value: 80
+
+
# Test 5: Tree traversals
+cat("5. Tree Traversals\n")
+
+
## 5. Tree Traversals
+
+
cat("In-order (sorted): ", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+
+
## In-order (sorted):
+
+
cat("Pre-order: ", paste(bst$preorder_traversal(), collapse = ", "), "\n")
+
+
## Pre-order:
+
+
cat("Post-order: ", paste(bst$postorder_traversal(), collapse = ", "), "\n")
+
+
## Post-order:
+
+
cat("Level-order (BFS): ", paste(bst$level_order_traversal(), collapse = ", "), "\n\n")
+
+
## Level-order (BFS): 50, 30, 70, 20, 40, 60, 80
+
+
# Test 6: Deletion operations
+cat("6. Deletion Operations\n")
+
+
## 6. Deletion Operations
+
+
delete_values <- c(20, 30, 50) # Delete leaf, node with one child, root
+
+for (value in delete_values) {
+ cat("Deleting", value, ":", if (bst$delete(value)) "Success" else "Failed", "\n")
+ cat("Tree after deletion:\n")
+ bst$print_tree()
+ cat("In-order traversal:", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+ cat("Tree size:", bst$get_size(), "\n\n")
+}
+
+
## Deleting 20 : Success
+## Tree after deletion:
+## └── 50
+## ├── 30
+## │ └── 40
+## └── 70
+## ├── 60
+## └── 80
+## In-order traversal:
+## Tree size: 6
+##
+## Deleting 30 : Success
+## Tree after deletion:
+## └── 50
+## ├── 40
+## └── 70
+## ├── 60
+## └── 80
+## In-order traversal:
+## Tree size: 5
+##
+## Deleting 50 : Success
+## Tree after deletion:
+## └── 60
+## ├── 40
+## └── 70
+## └── 80
+## In-order traversal:
+## Tree size: 4
+
+
# Test 7: Create BST from sorted vs unsorted array
+cat("7. BST Creation Comparison\n")
+
+
## 7. BST Creation Comparison
+
+
# Sorted array (creates skewed tree)
+sorted_array <- c(1, 2, 3, 4, 5, 6, 7)
+bst_sorted <- create_bst_from_array(sorted_array)
+cat("BST from sorted array [1,2,3,4,5,6,7]:\n")
+
+
## BST from sorted array [1,2,3,4,5,6,7]:
+
+
cat("Height:", bst_sorted$height(), "(should be close to log₂(7) ≈ 2.8 for balanced)\n")
+
+
## Height: 6 (should be close to log₂(7) ≈ 2.8 for balanced)
+
+
bst_sorted$print_tree()
+
+
## └── 1
+## └── 2
+## └── 3
+## └── 4
+## └── 5
+## └── 6
+## └── 7
+
+
# Shuffled array (more balanced)
+shuffled_array <- c(4, 2, 6, 1, 3, 5, 7)
+bst_shuffled <- create_bst_from_array(shuffled_array)
+cat("BST from shuffled array [4,2,6,1,3,5,7]:\n")
+
+
## BST from shuffled array [4,2,6,1,3,5,7]:
+
+
cat("Height:", bst_shuffled$height(), "\n")
+
+
## Height: 2
+
+
bst_shuffled$print_tree()
+
+
## └── 4
+## ├── 2
+## │ ├── 1
+## │ └── 3
+## └── 6
+## ├── 5
+## └── 7
+
+
# Test 8: kth smallest element
+cat("8. Finding kth Smallest Elements\n")
+
+
## 8. Finding kth Smallest Elements
+
+
for (k in 1:min(5, bst_shuffled$get_size())) {
+ kth_val <- kth_smallest(bst_shuffled, k)
+ cat(k, "smallest element:", kth_val, "\n")
+}
+
+
## 1 smallest element:
+## 2 smallest element:
+## 3 smallest element:
+## 4 smallest element:
+## 5 smallest element:
+
+
cat("\n")
+
+
# Test 9: Edge cases
+cat("9. Edge Cases\n")
+
+
## 9. Edge Cases
+
+
empty_bst <- BST$new()
+cat("Empty BST:\n")
+
+
## Empty BST:
+
+
cat("Is empty:", empty_bst$is_empty(), "\n")
+
+
## Is empty: TRUE
+
+
cat("Size:", empty_bst$get_size(), "\n")
+
+
## Size: 0
+
+
cat("Height:", empty_bst$height(), "\n")
+
+
## Height: -1
+
+
cat("Min value:", empty_bst$find_min(), "\n")
+
+
## Min value:
+
+
cat("Search for 5:", empty_bst$search(5), "\n")
+
+
## Search for 5: FALSE
+
+
cat("Delete 5:", empty_bst$delete(5), "\n")
+
+
## Delete 5: FALSE
+
+
# Single node BST
+single_bst <- BST$new()
+single_bst$insert(42)
+cat("\nSingle node BST:\n")
+
+
##
+## Single node BST:
+
+
cat("Size:", single_bst$get_size(), "\n")
+
+
## Size: 1
+
+
cat("Height:", single_bst$height(), "\n")
+
+
## Height: 0
+
+
cat("Min/Max:", single_bst$find_min(), "/", single_bst$find_max(), "\n")
+
+
## Min/Max: 42 / 42
+
+
single_bst$print_tree()
+
+
## └── 42
+
+
# Test 10: Real-world example - Student grades
+cat("\n10. Real-world Example - Student Grade Management\n")
+
+
##
+## 10. Real-world Example - Student Grade Management
+
+
grade_bst <- BST$new()
+grades <- c(85, 92, 78, 96, 83, 88, 91, 79, 87, 94)
+
+cat("Student grades:", paste(grades, collapse = ", "), "\n")
+
+
## Student grades: 85, 92, 78, 96, 83, 88, 91, 79, 87, 94
+
+
for (grade in grades) {
+ grade_bst$insert(grade)
+}
+
+cat("Grades in ascending order:", paste(grade_bst$inorder_traversal(), collapse = ", "), "\n")
+
+
## Grades in ascending order:
+
+
cat("Highest grade:", grade_bst$find_max(), "\n")
+
+
## Highest grade: 96
+
+
cat("Lowest grade:", grade_bst$find_min(), "\n")
+
+
## Lowest grade: 78
+
+
cat("Median grade (middle element):", kth_smallest(grade_bst, ceiling(grade_bst$get_size()/2)), "\n")
+
+
## Median grade (middle element):
+
+
# Find students above certain grade
+threshold <- 90
+above_threshold <- grade_bst$inorder_traversal()
+above_threshold <- above_threshold[above_threshold >= threshold]
+cat("Grades >=", threshold, ":", paste(above_threshold, collapse = ", "), "\n")
+
+
## Grades >= 90 :
+
+
cat("\nGrade distribution tree:\n")
+
+
##
+## Grade distribution tree:
+
+
grade_bst$print_tree()
+
+
## └── 85
+## ├── 78
+## │ └── 83
+## │ └── 79
+## └── 92
+## ├── 88
+## │ ├── 87
+## │ └── 91
+## └── 96
+## └── 94
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/binary_search_tree.md b/Desktop/open-source/R/documentation/binary_search_tree.md
new file mode 100644
index 00000000..38413f75
--- /dev/null
+++ b/Desktop/open-source/R/documentation/binary_search_tree.md
@@ -0,0 +1,917 @@
+
+
+``` r
+# Binary Search Tree (BST) Implementation
+#
+# A Binary Search Tree is a hierarchical data structure where each node has at most
+# two children (left and right), and for every node:
+# - All values in the left subtree are less than the node's value
+# - All values in the right subtree are greater than the node's value
+# - Both subtrees are also binary search trees
+#
+# Time Complexities (average case):
+# - Search: O(log n)
+# - Insert: O(log n)
+# - Delete: O(log n)
+# - Traversal: O(n)
+#
+# Worst case: O(n) when tree becomes skewed (like a linked list)
+#
+# Applications:
+# - Database indexing
+# - Expression parsing
+# - Priority queues
+# - File system organization
+
+# Define BST Node structure using R6 class system
+if (!require(R6, quietly = TRUE)) {
+ cat("Installing R6 package for object-oriented programming...\n")
+ install.packages("R6", quiet = TRUE)
+ library(R6)
+}
+```
+
+```
+## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
+## logical.return = TRUE, : there is no package called 'R6'
+```
+
+```
+## Installing R6 package for object-oriented programming...
+```
+
+``` r
+# BST Node class
+BSTNode <- R6Class("BSTNode",
+ public = list(
+ value = NULL,
+ left = NULL,
+ right = NULL,
+
+ initialize = function(value) {
+ self$value <- value
+ self$left <- NULL
+ self$right <- NULL
+ }
+ )
+)
+
+# Binary Search Tree class
+BST <- R6Class("BST",
+ public = list(
+ root = NULL,
+ size = 0,
+
+ initialize = function() {
+ self$root <- NULL
+ self$size <- 0
+ },
+
+ # Insert a value into the BST
+ insert = function(value) {
+ if (is.null(self$root)) {
+ self$root <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(self$root, value)
+ }
+ },
+
+ # Search for a value in the BST
+ search = function(value) {
+ return(private$search_recursive(self$root, value))
+ },
+
+ # Delete a value from the BST
+ delete = function(value) {
+ if (self$search(value)) {
+ self$root <- private$delete_recursive(self$root, value)
+ self$size <- self$size - 1
+ return(TRUE)
+ }
+ return(FALSE)
+ },
+
+ # Find minimum value in the BST
+ find_min = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_min_recursive(self$root)$value)
+ },
+
+ # Find maximum value in the BST
+ find_max = function() {
+ if (is.null(self$root)) return(NULL)
+ return(private$find_max_recursive(self$root)$value)
+ },
+
+ # In-order traversal (left, root, right) - gives sorted output
+ inorder_traversal = function() {
+ result <- c()
+ private$inorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Pre-order traversal (root, left, right)
+ preorder_traversal = function() {
+ result <- c()
+ private$preorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Post-order traversal (left, right, root)
+ postorder_traversal = function() {
+ result <- c()
+ private$postorder_recursive(self$root, result)
+ return(result)
+ },
+
+ # Level-order traversal (breadth-first)
+ level_order_traversal = function() {
+ if (is.null(self$root)) return(c())
+
+ result <- c()
+ queue <- list(self$root)
+
+ while (length(queue) > 0) {
+ node <- queue[[1]]
+ queue <- queue[-1]
+
+ result <- c(result, node$value)
+
+ if (!is.null(node$left)) {
+ queue <- append(queue, list(node$left))
+ }
+ if (!is.null(node$right)) {
+ queue <- append(queue, list(node$right))
+ }
+ }
+
+ return(result)
+ },
+
+ # Get height of the tree
+ height = function() {
+ return(private$height_recursive(self$root))
+ },
+
+ # Check if tree is valid BST
+ is_valid_bst = function() {
+ return(private$is_valid_bst_recursive(self$root, -Inf, Inf))
+ },
+
+ # Get size of the tree
+ get_size = function() {
+ return(self$size)
+ },
+
+ # Check if tree is empty
+ is_empty = function() {
+ return(is.null(self$root))
+ },
+
+ # Print tree structure
+ print_tree = function() {
+ if (is.null(self$root)) {
+ cat("Empty tree\n")
+ return()
+ }
+ private$print_tree_recursive(self$root, "", TRUE)
+ }
+ ),
+
+ private = list(
+ # Recursive helper for insertion
+ insert_recursive = function(node, value) {
+ if (value < node$value) {
+ if (is.null(node$left)) {
+ node$left <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$left, value)
+ }
+ } else if (value > node$value) {
+ if (is.null(node$right)) {
+ node$right <- BSTNode$new(value)
+ self$size <- self$size + 1
+ } else {
+ private$insert_recursive(node$right, value)
+ }
+ }
+ # If value == node$value, don't insert (no duplicates)
+ },
+
+ # Recursive helper for search
+ search_recursive = function(node, value) {
+ if (is.null(node) || node$value == value) {
+ return(!is.null(node))
+ }
+
+ if (value < node$value) {
+ return(private$search_recursive(node$left, value))
+ } else {
+ return(private$search_recursive(node$right, value))
+ }
+ },
+
+ # Recursive helper for deletion
+ delete_recursive = function(node, value) {
+ if (is.null(node)) {
+ return(NULL)
+ }
+
+ if (value < node$value) {
+ node$left <- private$delete_recursive(node$left, value)
+ } else if (value > node$value) {
+ node$right <- private$delete_recursive(node$right, value)
+ } else {
+ # Node to delete found
+ if (is.null(node$left)) {
+ return(node$right)
+ } else if (is.null(node$right)) {
+ return(node$left)
+ }
+
+ # Node has two children - find inorder successor
+ successor <- private$find_min_recursive(node$right)
+ node$value <- successor$value
+ node$right <- private$delete_recursive(node$right, successor$value)
+ }
+
+ return(node)
+ },
+
+ # Find minimum node
+ find_min_recursive = function(node) {
+ while (!is.null(node$left)) {
+ node <- node$left
+ }
+ return(node)
+ },
+
+ # Find maximum node
+ find_max_recursive = function(node) {
+ while (!is.null(node$right)) {
+ node <- node$right
+ }
+ return(node)
+ },
+
+ # In-order traversal helper
+ inorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$inorder_recursive(node$left, result)
+ result <<- c(result, node$value)
+ result <<- private$inorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Pre-order traversal helper
+ preorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- c(result, node$value)
+ result <<- private$preorder_recursive(node$left, result)
+ result <<- private$preorder_recursive(node$right, result)
+ }
+ return(result)
+ },
+
+ # Post-order traversal helper
+ postorder_recursive = function(node, result) {
+ if (!is.null(node)) {
+ result <<- private$postorder_recursive(node$left, result)
+ result <<- private$postorder_recursive(node$right, result)
+ result <<- c(result, node$value)
+ }
+ return(result)
+ },
+
+ # Height calculation helper
+ height_recursive = function(node) {
+ if (is.null(node)) {
+ return(-1) # Height of empty tree is -1
+ }
+
+ left_height <- private$height_recursive(node$left)
+ right_height <- private$height_recursive(node$right)
+
+ return(1 + max(left_height, right_height))
+ },
+
+ # BST validation helper
+ is_valid_bst_recursive = function(node, min_val, max_val) {
+ if (is.null(node)) {
+ return(TRUE)
+ }
+
+ if (node$value <= min_val || node$value >= max_val) {
+ return(FALSE)
+ }
+
+ return(private$is_valid_bst_recursive(node$left, min_val, node$value) &&
+ private$is_valid_bst_recursive(node$right, node$value, max_val))
+ },
+
+ # Tree printing helper
+ print_tree_recursive = function(node, prefix, is_last) {
+ if (!is.null(node)) {
+ cat(prefix)
+ cat(if (is_last) "└── " else "├── ")
+ cat(node$value, "\n")
+
+ new_prefix <- paste0(prefix, if (is_last) " " else "│ ")
+
+ if (!is.null(node$left) || !is.null(node$right)) {
+ if (!is.null(node$left)) {
+ private$print_tree_recursive(node$left, new_prefix, is.null(node$right))
+ }
+ if (!is.null(node$right)) {
+ private$print_tree_recursive(node$right, new_prefix, TRUE)
+ }
+ }
+ }
+ }
+ )
+)
+
+# Utility functions for BST operations
+
+# Create BST from array
+create_bst_from_array <- function(arr) {
+ #' Create a BST from an array of values
+ #' @param arr: Array of values to insert
+ #' @return: BST object
+
+ bst <- BST$new()
+ for (value in arr) {
+ bst$insert(value)
+ }
+ return(bst)
+}
+
+# Check if two BSTs are identical
+are_identical_bsts <- function(bst1, bst2) {
+ #' Check if two BSTs have identical structure and values
+ #' @param bst1: First BST
+ #' @param bst2: Second BST
+ #' @return: TRUE if identical, FALSE otherwise
+
+ traversal1 <- bst1$preorder_traversal()
+ traversal2 <- bst2$preorder_traversal()
+
+ return(identical(traversal1, traversal2))
+}
+
+# Find kth smallest element in BST
+kth_smallest <- function(bst, k) {
+ #' Find the kth smallest element in BST
+ #' @param bst: BST object
+ #' @param k: Position (1-indexed)
+ #' @return: kth smallest value or NULL if k is out of bounds
+
+ inorder <- bst$inorder_traversal()
+ if (k > 0 && k <= length(inorder)) {
+ return(inorder[k])
+ }
+ return(NULL)
+}
+
+# Example usage and testing
+cat("=== Binary Search Tree (BST) Implementation ===\n\n")
+```
+
+```
+## === Binary Search Tree (BST) Implementation ===
+```
+
+``` r
+# Test 1: Basic BST operations
+cat("1. Basic BST Operations\n")
+```
+
+```
+## 1. Basic BST Operations
+```
+
+``` r
+bst <- BST$new()
+
+# Insert values
+values <- c(50, 30, 70, 20, 40, 60, 80)
+cat("Inserting values:", paste(values, collapse = ", "), "\n")
+```
+
+```
+## Inserting values: 50, 30, 70, 20, 40, 60, 80
+```
+
+``` r
+for (value in values) {
+ bst$insert(value)
+}
+
+cat("Tree size:", bst$get_size(), "\n")
+```
+
+```
+## Tree size: 7
+```
+
+``` r
+cat("Tree height:", bst$height(), "\n")
+```
+
+```
+## Tree height: 2
+```
+
+``` r
+cat("Is valid BST:", bst$is_valid_bst(), "\n\n")
+```
+
+```
+## Is valid BST: TRUE
+```
+
+``` r
+# Test 2: Tree visualization
+cat("2. Tree Structure\n")
+```
+
+```
+## 2. Tree Structure
+```
+
+``` r
+bst$print_tree()
+```
+
+```
+## └── 50
+## ├── 30
+## │ ├── 20
+## │ └── 40
+## └── 70
+## ├── 60
+## └── 80
+```
+
+``` r
+cat("\n")
+```
+
+``` r
+# Test 3: Search operations
+cat("3. Search Operations\n")
+```
+
+```
+## 3. Search Operations
+```
+
+``` r
+search_values <- c(40, 25, 80, 100)
+for (value in search_values) {
+ found <- bst$search(value)
+ cat("Search for", value, ":", if (found) "Found" else "Not found", "\n")
+}
+```
+
+```
+## Search for 40 : Found
+## Search for 25 : Not found
+## Search for 80 : Found
+## Search for 100 : Not found
+```
+
+``` r
+cat("\n")
+```
+
+``` r
+# Test 4: Min/Max operations
+cat("4. Min/Max Operations\n")
+```
+
+```
+## 4. Min/Max Operations
+```
+
+``` r
+cat("Minimum value:", bst$find_min(), "\n")
+```
+
+```
+## Minimum value: 20
+```
+
+``` r
+cat("Maximum value:", bst$find_max(), "\n\n")
+```
+
+```
+## Maximum value: 80
+```
+
+``` r
+# Test 5: Tree traversals
+cat("5. Tree Traversals\n")
+```
+
+```
+## 5. Tree Traversals
+```
+
+``` r
+cat("In-order (sorted): ", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+```
+
+```
+## In-order (sorted):
+```
+
+``` r
+cat("Pre-order: ", paste(bst$preorder_traversal(), collapse = ", "), "\n")
+```
+
+```
+## Pre-order:
+```
+
+``` r
+cat("Post-order: ", paste(bst$postorder_traversal(), collapse = ", "), "\n")
+```
+
+```
+## Post-order:
+```
+
+``` r
+cat("Level-order (BFS): ", paste(bst$level_order_traversal(), collapse = ", "), "\n\n")
+```
+
+```
+## Level-order (BFS): 50, 30, 70, 20, 40, 60, 80
+```
+
+``` r
+# Test 6: Deletion operations
+cat("6. Deletion Operations\n")
+```
+
+```
+## 6. Deletion Operations
+```
+
+``` r
+delete_values <- c(20, 30, 50) # Delete leaf, node with one child, root
+
+for (value in delete_values) {
+ cat("Deleting", value, ":", if (bst$delete(value)) "Success" else "Failed", "\n")
+ cat("Tree after deletion:\n")
+ bst$print_tree()
+ cat("In-order traversal:", paste(bst$inorder_traversal(), collapse = ", "), "\n")
+ cat("Tree size:", bst$get_size(), "\n\n")
+}
+```
+
+```
+## Deleting 20 : Success
+## Tree after deletion:
+## └── 50
+## ├── 30
+## │ └── 40
+## └── 70
+## ├── 60
+## └── 80
+## In-order traversal:
+## Tree size: 6
+##
+## Deleting 30 : Success
+## Tree after deletion:
+## └── 50
+## ├── 40
+## └── 70
+## ├── 60
+## └── 80
+## In-order traversal:
+## Tree size: 5
+##
+## Deleting 50 : Success
+## Tree after deletion:
+## └── 60
+## ├── 40
+## └── 70
+## └── 80
+## In-order traversal:
+## Tree size: 4
+```
+
+``` r
+# Test 7: Create BST from sorted vs unsorted array
+cat("7. BST Creation Comparison\n")
+```
+
+```
+## 7. BST Creation Comparison
+```
+
+``` r
+# Sorted array (creates skewed tree)
+sorted_array <- c(1, 2, 3, 4, 5, 6, 7)
+bst_sorted <- create_bst_from_array(sorted_array)
+cat("BST from sorted array [1,2,3,4,5,6,7]:\n")
+```
+
+```
+## BST from sorted array [1,2,3,4,5,6,7]:
+```
+
+``` r
+cat("Height:", bst_sorted$height(), "(should be close to log₂(7) ≈ 2.8 for balanced)\n")
+```
+
+```
+## Height: 6 (should be close to log₂(7) ≈ 2.8 for balanced)
+```
+
+``` r
+bst_sorted$print_tree()
+```
+
+```
+## └── 1
+## └── 2
+## └── 3
+## └── 4
+## └── 5
+## └── 6
+## └── 7
+```
+
+``` r
+# Shuffled array (more balanced)
+shuffled_array <- c(4, 2, 6, 1, 3, 5, 7)
+bst_shuffled <- create_bst_from_array(shuffled_array)
+cat("BST from shuffled array [4,2,6,1,3,5,7]:\n")
+```
+
+```
+## BST from shuffled array [4,2,6,1,3,5,7]:
+```
+
+``` r
+cat("Height:", bst_shuffled$height(), "\n")
+```
+
+```
+## Height: 2
+```
+
+``` r
+bst_shuffled$print_tree()
+```
+
+```
+## └── 4
+## ├── 2
+## │ ├── 1
+## │ └── 3
+## └── 6
+## ├── 5
+## └── 7
+```
+
+``` r
+# Test 8: kth smallest element
+cat("8. Finding kth Smallest Elements\n")
+```
+
+```
+## 8. Finding kth Smallest Elements
+```
+
+``` r
+for (k in 1:min(5, bst_shuffled$get_size())) {
+ kth_val <- kth_smallest(bst_shuffled, k)
+ cat(k, "smallest element:", kth_val, "\n")
+}
+```
+
+```
+## 1 smallest element:
+## 2 smallest element:
+## 3 smallest element:
+## 4 smallest element:
+## 5 smallest element:
+```
+
+``` r
+cat("\n")
+```
+
+``` r
+# Test 9: Edge cases
+cat("9. Edge Cases\n")
+```
+
+```
+## 9. Edge Cases
+```
+
+``` r
+empty_bst <- BST$new()
+cat("Empty BST:\n")
+```
+
+```
+## Empty BST:
+```
+
+``` r
+cat("Is empty:", empty_bst$is_empty(), "\n")
+```
+
+```
+## Is empty: TRUE
+```
+
+``` r
+cat("Size:", empty_bst$get_size(), "\n")
+```
+
+```
+## Size: 0
+```
+
+``` r
+cat("Height:", empty_bst$height(), "\n")
+```
+
+```
+## Height: -1
+```
+
+``` r
+cat("Min value:", empty_bst$find_min(), "\n")
+```
+
+```
+## Min value:
+```
+
+``` r
+cat("Search for 5:", empty_bst$search(5), "\n")
+```
+
+```
+## Search for 5: FALSE
+```
+
+``` r
+cat("Delete 5:", empty_bst$delete(5), "\n")
+```
+
+```
+## Delete 5: FALSE
+```
+
+``` r
+# Single node BST
+single_bst <- BST$new()
+single_bst$insert(42)
+cat("\nSingle node BST:\n")
+```
+
+```
+##
+## Single node BST:
+```
+
+``` r
+cat("Size:", single_bst$get_size(), "\n")
+```
+
+```
+## Size: 1
+```
+
+``` r
+cat("Height:", single_bst$height(), "\n")
+```
+
+```
+## Height: 0
+```
+
+``` r
+cat("Min/Max:", single_bst$find_min(), "/", single_bst$find_max(), "\n")
+```
+
+```
+## Min/Max: 42 / 42
+```
+
+``` r
+single_bst$print_tree()
+```
+
+```
+## └── 42
+```
+
+``` r
+# Test 10: Real-world example - Student grades
+cat("\n10. Real-world Example - Student Grade Management\n")
+```
+
+```
+##
+## 10. Real-world Example - Student Grade Management
+```
+
+``` r
+grade_bst <- BST$new()
+grades <- c(85, 92, 78, 96, 83, 88, 91, 79, 87, 94)
+
+cat("Student grades:", paste(grades, collapse = ", "), "\n")
+```
+
+```
+## Student grades: 85, 92, 78, 96, 83, 88, 91, 79, 87, 94
+```
+
+``` r
+for (grade in grades) {
+ grade_bst$insert(grade)
+}
+
+cat("Grades in ascending order:", paste(grade_bst$inorder_traversal(), collapse = ", "), "\n")
+```
+
+```
+## Grades in ascending order:
+```
+
+``` r
+cat("Highest grade:", grade_bst$find_max(), "\n")
+```
+
+```
+## Highest grade: 96
+```
+
+``` r
+cat("Lowest grade:", grade_bst$find_min(), "\n")
+```
+
+```
+## Lowest grade: 78
+```
+
+``` r
+cat("Median grade (middle element):", kth_smallest(grade_bst, ceiling(grade_bst$get_size()/2)), "\n")
+```
+
+```
+## Median grade (middle element):
+```
+
+``` r
+# Find students above certain grade
+threshold <- 90
+above_threshold <- grade_bst$inorder_traversal()
+above_threshold <- above_threshold[above_threshold >= threshold]
+cat("Grades >=", threshold, ":", paste(above_threshold, collapse = ", "), "\n")
+```
+
+```
+## Grades >= 90 :
+```
+
+``` r
+cat("\nGrade distribution tree:\n")
+```
+
+```
+##
+## Grade distribution tree:
+```
+
+``` r
+grade_bst$print_tree()
+```
+
+```
+## └── 85
+## ├── 78
+## │ └── 83
+## │ └── 79
+## └── 92
+## ├── 88
+## │ ├── 87
+## │ └── 91
+## └── 96
+## └── 94
+```
+
diff --git a/Desktop/open-source/R/documentation/breadth_first_search.html b/Desktop/open-source/R/documentation/breadth_first_search.html
new file mode 100644
index 00000000..415f98ed
--- /dev/null
+++ b/Desktop/open-source/R/documentation/breadth_first_search.html
@@ -0,0 +1,387 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Breadth-First Search (BFS) Algorithm
+#
+# BFS is a graph traversal algorithm that explores all vertices at the current depth
+# before moving to vertices at the next depth level. It uses a queue data structure.
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and queue
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during BFS traversal
+
+# BFS function using queue (implemented with vector)
+breadth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and queue
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+ result <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# BFS to find shortest path between two vertices
+bfs_shortest_path <- function(graph, start_vertex, end_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue, and parent tracking
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ parent <- rep(-1, max(all_vertices))
+ names(parent) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+
+ # If we reached the target vertex, reconstruct path
+ if (vertex == end_vertex) {
+ path <- c()
+ current <- end_vertex
+
+ # Backtrack using parent array
+ while (current != -1) {
+ path <- c(current, path)
+ current <- parent[current]
+ }
+
+ return(list(
+ path = path,
+ distance = length(path) - 1
+ ))
+ }
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ parent[neighbor] <- vertex
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ # No path found
+ return(list(
+ path = NULL,
+ distance = -1
+ ))
+}
+
+# BFS to find all vertices at a specific distance
+bfs_vertices_at_distance <- function(graph, start_vertex, target_distance) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue with distances
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- list(list(vertex = start_vertex, distance = 0))
+ vertices_at_distance <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex with distance from front of queue
+ current <- queue[[1]]
+ queue <- queue[-1]
+ vertex <- current$vertex
+ distance <- current$distance
+
+ # If current distance matches target, add to result
+ if (distance == target_distance) {
+ vertices_at_distance <- c(vertices_at_distance, vertex)
+ }
+
+ # Don't explore further if we've reached target distance
+ if (distance < target_distance) {
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, list(list(vertex = neighbor, distance = distance + 1)))
+ }
+ }
+ }
+ }
+ }
+
+ return(vertices_at_distance)
+}
+
+# Example usage and testing
+cat("=== Breadth-First Search (BFS) Algorithm ===\n")
+
+
## === Breadth-First Search (BFS) Algorithm ===
+
+
# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+
+
## Graph structure (adjacency list):
+
+
for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+
+
## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 4, 5 ]
+## Vertex 3 -> [ 6 ]
+## Vertex 4 -> [ ]
+## Vertex 5 -> [ ]
+## Vertex 6 -> [ ]
+
+
# Test BFS traversal
+cat("\nBFS starting from vertex 1:\n")
+
+
##
+## BFS starting from vertex 1:
+
+
result <- breadth_first_search(graph, 1)
+cat("Traversal order:", paste(result, collapse = " -> "), "\n")
+
+
## Traversal order: 1 -> 2 -> 3 -> 4 -> 5 -> 6
+
+
# Test BFS from different starting vertex
+cat("\nBFS starting from vertex 2:\n")
+
+
##
+## BFS starting from vertex 2:
+
+
result_from_2 <- breadth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+
+
## Traversal order: 2 -> 4 -> 5
+
+
# Test shortest path finding
+cat("\n=== Shortest Path Finding ===\n")
+
+
##
+## === Shortest Path Finding ===
+
+
path_result <- bfs_shortest_path(graph, 1, 5)
+if (!is.null(path_result$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_result$path, collapse = " -> "), "\n")
+ cat("Distance:", path_result$distance, "\n")
+} else {
+ cat("No path found from 1 to 5\n")
+}
+
+
## Shortest path from 1 to 5: 1 -> 2 -> 5
+## Distance: 2
+
+
# Test vertices at specific distance
+cat("\n=== Vertices at Specific Distance ===\n")
+
+
##
+## === Vertices at Specific Distance ===
+
+
vertices_dist_2 <- bfs_vertices_at_distance(graph, 1, 2)
+cat("Vertices at distance 2 from vertex 1:", paste(vertices_dist_2, collapse = ", "), "\n")
+
+
## Vertices at distance 2 from vertex 1: 4, 5, 6
+
+
# Example with a more complex connected graph
+cat("\n=== Example with More Complex Graph ===\n")
+
+
##
+## === Example with More Complex Graph ===
+
+
complex_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4, 5),
+ "3" = c(1, 6),
+ "4" = c(2, 7),
+ "5" = c(2, 8),
+ "6" = c(3, 9),
+ "7" = c(4),
+ "8" = c(5),
+ "9" = c(6)
+)
+
+cat("Complex graph BFS starting from vertex 1:\n")
+
+
## Complex graph BFS starting from vertex 1:
+
+
complex_result <- breadth_first_search(complex_graph, 1)
+cat("Traversal order:", paste(complex_result, collapse = " -> "), "\n")
+
+
## Traversal order: 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9
+
+
# Test shortest path in complex graph
+path_complex <- bfs_shortest_path(complex_graph, 1, 9)
+if (!is.null(path_complex$path)) {
+ cat("Shortest path from 1 to 9:", paste(path_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_complex$distance, "\n")
+}
+
+
## Shortest path from 1 to 9: 1 -> 3 -> 6 -> 9
+## Distance: 3
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/breadth_first_search.md b/Desktop/open-source/R/documentation/breadth_first_search.md
new file mode 100644
index 00000000..1de85dc0
--- /dev/null
+++ b/Desktop/open-source/R/documentation/breadth_first_search.md
@@ -0,0 +1,329 @@
+
+
+``` r
+# Breadth-First Search (BFS) Algorithm
+#
+# BFS is a graph traversal algorithm that explores all vertices at the current depth
+# before moving to vertices at the next depth level. It uses a queue data structure.
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and queue
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during BFS traversal
+
+# BFS function using queue (implemented with vector)
+breadth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and queue
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+ result <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# BFS to find shortest path between two vertices
+bfs_shortest_path <- function(graph, start_vertex, end_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue, and parent tracking
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ parent <- rep(-1, max(all_vertices))
+ names(parent) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+
+ # If we reached the target vertex, reconstruct path
+ if (vertex == end_vertex) {
+ path <- c()
+ current <- end_vertex
+
+ # Backtrack using parent array
+ while (current != -1) {
+ path <- c(current, path)
+ current <- parent[current]
+ }
+
+ return(list(
+ path = path,
+ distance = length(path) - 1
+ ))
+ }
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ parent[neighbor] <- vertex
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ # No path found
+ return(list(
+ path = NULL,
+ distance = -1
+ ))
+}
+
+# BFS to find all vertices at a specific distance
+bfs_vertices_at_distance <- function(graph, start_vertex, target_distance) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue with distances
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- list(list(vertex = start_vertex, distance = 0))
+ vertices_at_distance <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex with distance from front of queue
+ current <- queue[[1]]
+ queue <- queue[-1]
+ vertex <- current$vertex
+ distance <- current$distance
+
+ # If current distance matches target, add to result
+ if (distance == target_distance) {
+ vertices_at_distance <- c(vertices_at_distance, vertex)
+ }
+
+ # Don't explore further if we've reached target distance
+ if (distance < target_distance) {
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, list(list(vertex = neighbor, distance = distance + 1)))
+ }
+ }
+ }
+ }
+ }
+
+ return(vertices_at_distance)
+}
+
+# Example usage and testing
+cat("=== Breadth-First Search (BFS) Algorithm ===\n")
+```
+
+```
+## === Breadth-First Search (BFS) Algorithm ===
+```
+
+``` r
+# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+```
+
+```
+## Graph structure (adjacency list):
+```
+
+``` r
+for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+```
+
+```
+## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 4, 5 ]
+## Vertex 3 -> [ 6 ]
+## Vertex 4 -> [ ]
+## Vertex 5 -> [ ]
+## Vertex 6 -> [ ]
+```
+
+``` r
+# Test BFS traversal
+cat("\nBFS starting from vertex 1:\n")
+```
+
+```
+##
+## BFS starting from vertex 1:
+```
+
+``` r
+result <- breadth_first_search(graph, 1)
+cat("Traversal order:", paste(result, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 1 -> 2 -> 3 -> 4 -> 5 -> 6
+```
+
+``` r
+# Test BFS from different starting vertex
+cat("\nBFS starting from vertex 2:\n")
+```
+
+```
+##
+## BFS starting from vertex 2:
+```
+
+``` r
+result_from_2 <- breadth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 2 -> 4 -> 5
+```
+
+``` r
+# Test shortest path finding
+cat("\n=== Shortest Path Finding ===\n")
+```
+
+```
+##
+## === Shortest Path Finding ===
+```
+
+``` r
+path_result <- bfs_shortest_path(graph, 1, 5)
+if (!is.null(path_result$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_result$path, collapse = " -> "), "\n")
+ cat("Distance:", path_result$distance, "\n")
+} else {
+ cat("No path found from 1 to 5\n")
+}
+```
+
+```
+## Shortest path from 1 to 5: 1 -> 2 -> 5
+## Distance: 2
+```
+
+``` r
+# Test vertices at specific distance
+cat("\n=== Vertices at Specific Distance ===\n")
+```
+
+```
+##
+## === Vertices at Specific Distance ===
+```
+
+``` r
+vertices_dist_2 <- bfs_vertices_at_distance(graph, 1, 2)
+cat("Vertices at distance 2 from vertex 1:", paste(vertices_dist_2, collapse = ", "), "\n")
+```
+
+```
+## Vertices at distance 2 from vertex 1: 4, 5, 6
+```
+
+``` r
+# Example with a more complex connected graph
+cat("\n=== Example with More Complex Graph ===\n")
+```
+
+```
+##
+## === Example with More Complex Graph ===
+```
+
+``` r
+complex_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4, 5),
+ "3" = c(1, 6),
+ "4" = c(2, 7),
+ "5" = c(2, 8),
+ "6" = c(3, 9),
+ "7" = c(4),
+ "8" = c(5),
+ "9" = c(6)
+)
+
+cat("Complex graph BFS starting from vertex 1:\n")
+```
+
+```
+## Complex graph BFS starting from vertex 1:
+```
+
+``` r
+complex_result <- breadth_first_search(complex_graph, 1)
+cat("Traversal order:", paste(complex_result, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9
+```
+
+``` r
+# Test shortest path in complex graph
+path_complex <- bfs_shortest_path(complex_graph, 1, 9)
+if (!is.null(path_complex$path)) {
+ cat("Shortest path from 1 to 9:", paste(path_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_complex$distance, "\n")
+}
+```
+
+```
+## Shortest path from 1 to 9: 1 -> 3 -> 6 -> 9
+## Distance: 3
+```
+
diff --git a/Desktop/open-source/R/documentation/coin_change.html b/Desktop/open-source/R/documentation/coin_change.html
new file mode 100644
index 00000000..dc1670f4
--- /dev/null
+++ b/Desktop/open-source/R/documentation/coin_change.html
@@ -0,0 +1,223 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Coin Change Problem
+#
+# The Coin Change problem finds the minimum number of coins needed to make a certain amount
+# using a given set of coin denominations.
+#
+# Time Complexity: O(amount * n) where n = number of coin denominations
+# Space Complexity: O(amount)
+#
+# Applications:
+# - Currency and cash management
+# - Making change in vending machines or payment systems
+# - Dynamic resource allocation
+# - Minimum cost problems in algorithms
+
+# Function to compute minimum coins required
+coin_change <- function(coins, amount) {
+ #' Compute minimum number of coins needed to make the given amount
+ #' @param coins: Numeric vector of coin denominations
+ #' @param amount: Total amount to make
+ #' @return: Minimum number of coins required, or -1 if not possible
+
+ # Initialize DP array
+ dp <- rep(Inf, amount + 1)
+ dp[0 + 1] <- 0 # zero coins needed for amount 0
+
+ for (i in 1:amount) {
+ for (coin in coins) {
+ if (coin <= i) {
+ dp[i + 1] <- min(dp[i + 1], 1 + dp[i - coin + 1])
+ }
+ }
+ }
+
+ if (dp[amount + 1] == Inf) {
+ return(-1)
+ } else {
+ return(dp[amount + 1])
+ }
+}
+
+# Function to print the DP table (for educational purposes)
+print_coin_change_dp <- function(dp, amount) {
+ cat("DP Table for Coin Change:\n")
+ for (i in 0:amount) {
+ cat(sprintf("Amount %2d: %s\n", i, dp[i + 1]))
+ }
+ cat("\n")
+}
+
+
+# Example Usage & Testing
+cat("=== Coin Change Problem ===\n\n")
+
+
## === Coin Change Problem ===
+
+
# Test 1: Basic Example
+coins <- c(1, 2, 5)
+amount <- 11
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+
+
## Coins: 1, 2, 5
+
+
cat("Amount:", amount, "\n")
+
+
## Amount: 11
+
+
min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+
+
## Minimum Coins Needed: 3
+
+
# Test 2: No solution case
+coins <- c(2, 4)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+
+
## Coins: 2, 4
+
+
cat("Amount:", amount, "\n")
+
+
## Amount: 7
+
+
min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+
+
## Minimum Coins Needed: -1
+
+
# Test 3: Larger dataset
+coins <- c(1, 3, 4, 5)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+
+
## Coins: 1, 3, 4, 5
+
+
cat("Amount:", amount, "\n")
+
+
## Amount: 7
+
+
min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+
+
## Minimum Coins Needed: 2
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/coin_change.md b/Desktop/open-source/R/documentation/coin_change.md
new file mode 100644
index 00000000..dab79cb5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/coin_change.md
@@ -0,0 +1,145 @@
+
+
+``` r
+# Coin Change Problem
+#
+# The Coin Change problem finds the minimum number of coins needed to make a certain amount
+# using a given set of coin denominations.
+#
+# Time Complexity: O(amount * n) where n = number of coin denominations
+# Space Complexity: O(amount)
+#
+# Applications:
+# - Currency and cash management
+# - Making change in vending machines or payment systems
+# - Dynamic resource allocation
+# - Minimum cost problems in algorithms
+
+# Function to compute minimum coins required
+coin_change <- function(coins, amount) {
+ #' Compute minimum number of coins needed to make the given amount
+ #' @param coins: Numeric vector of coin denominations
+ #' @param amount: Total amount to make
+ #' @return: Minimum number of coins required, or -1 if not possible
+
+ # Initialize DP array
+ dp <- rep(Inf, amount + 1)
+ dp[0 + 1] <- 0 # zero coins needed for amount 0
+
+ for (i in 1:amount) {
+ for (coin in coins) {
+ if (coin <= i) {
+ dp[i + 1] <- min(dp[i + 1], 1 + dp[i - coin + 1])
+ }
+ }
+ }
+
+ if (dp[amount + 1] == Inf) {
+ return(-1)
+ } else {
+ return(dp[amount + 1])
+ }
+}
+
+# Function to print the DP table (for educational purposes)
+print_coin_change_dp <- function(dp, amount) {
+ cat("DP Table for Coin Change:\n")
+ for (i in 0:amount) {
+ cat(sprintf("Amount %2d: %s\n", i, dp[i + 1]))
+ }
+ cat("\n")
+}
+
+
+# Example Usage & Testing
+cat("=== Coin Change Problem ===\n\n")
+```
+
+```
+## === Coin Change Problem ===
+```
+
+``` r
+# Test 1: Basic Example
+coins <- c(1, 2, 5)
+amount <- 11
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+```
+
+```
+## Coins: 1, 2, 5
+```
+
+``` r
+cat("Amount:", amount, "\n")
+```
+
+```
+## Amount: 11
+```
+
+``` r
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+```
+
+```
+## Minimum Coins Needed: 3
+```
+
+``` r
+# Test 2: No solution case
+coins <- c(2, 4)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+```
+
+```
+## Coins: 2, 4
+```
+
+``` r
+cat("Amount:", amount, "\n")
+```
+
+```
+## Amount: 7
+```
+
+``` r
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+```
+
+```
+## Minimum Coins Needed: -1
+```
+
+``` r
+# Test 3: Larger dataset
+coins <- c(1, 3, 4, 5)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+```
+
+```
+## Coins: 1, 3, 4, 5
+```
+
+``` r
+cat("Amount:", amount, "\n")
+```
+
+```
+## Amount: 7
+```
+
+``` r
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+```
+
+```
+## Minimum Coins Needed: 2
+```
+
diff --git a/Desktop/open-source/R/documentation/data_normalization_standardization.html b/Desktop/open-source/R/documentation/data_normalization_standardization.html
new file mode 100644
index 00000000..7c8fb1fa
--- /dev/null
+++ b/Desktop/open-source/R/documentation/data_normalization_standardization.html
@@ -0,0 +1,494 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# normalization & standardization
+normalization<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+
+standardization<-function(x){
+ return((x-mean(x))/sd(x))
+}
+
+head(iris)
+
+
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+## 1 5.1 3.5 1.4 0.2 setosa
+## 2 4.9 3.0 1.4 0.2 setosa
+## 3 4.7 3.2 1.3 0.2 setosa
+## 4 4.6 3.1 1.5 0.2 setosa
+## 5 5.0 3.6 1.4 0.2 setosa
+## 6 5.4 3.9 1.7 0.4 setosa
+
+
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+# 1 5.1 3.5 1.4 0.2 setosa
+# 2 4.9 3.0 1.4 0.2 setosa
+# 3 4.7 3.2 1.3 0.2 setosa
+# 4 4.6 3.1 1.5 0.2 setosa
+# 5 5.0 3.6 1.4 0.2 setosa
+# 6 5.4 3.9 1.7 0.4 setosa
+
+iris<-iris[,-5]
+head(iris)
+
+
## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## 1 5.1 3.5 1.4 0.2
+## 2 4.9 3.0 1.4 0.2
+## 3 4.7 3.2 1.3 0.2
+## 4 4.6 3.1 1.5 0.2
+## 5 5.0 3.6 1.4 0.2
+## 6 5.4 3.9 1.7 0.4
+
+
# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# 1 5.1 3.5 1.4 0.2
+# 2 4.9 3.0 1.4 0.2
+# 3 4.7 3.2 1.3 0.2
+# 4 4.6 3.1 1.5 0.2
+# 5 5.0 3.6 1.4 0.2
+# 6 5.4 3.9 1.7 0.4
+
+#normalize
+apply(as.matrix(iris),2,normalization)
+
+
## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] 0.22222222 0.62500000 0.06779661 0.04166667
+## [2,] 0.16666667 0.41666667 0.06779661 0.04166667
+## [3,] 0.11111111 0.50000000 0.05084746 0.04166667
+## [4,] 0.08333333 0.45833333 0.08474576 0.04166667
+## [5,] 0.19444444 0.66666667 0.06779661 0.04166667
+## [6,] 0.30555556 0.79166667 0.11864407 0.12500000
+## [7,] 0.08333333 0.58333333 0.06779661 0.08333333
+## [8,] 0.19444444 0.58333333 0.08474576 0.04166667
+## [9,] 0.02777778 0.37500000 0.06779661 0.04166667
+## [10,] 0.16666667 0.45833333 0.08474576 0.00000000
+## [11,] 0.30555556 0.70833333 0.08474576 0.04166667
+## [12,] 0.13888889 0.58333333 0.10169492 0.04166667
+## [13,] 0.13888889 0.41666667 0.06779661 0.00000000
+## [14,] 0.00000000 0.41666667 0.01694915 0.00000000
+## [15,] 0.41666667 0.83333333 0.03389831 0.04166667
+## [16,] 0.38888889 1.00000000 0.08474576 0.12500000
+## [17,] 0.30555556 0.79166667 0.05084746 0.12500000
+## [18,] 0.22222222 0.62500000 0.06779661 0.08333333
+## [19,] 0.38888889 0.75000000 0.11864407 0.08333333
+## [20,] 0.22222222 0.75000000 0.08474576 0.08333333
+## [21,] 0.30555556 0.58333333 0.11864407 0.04166667
+## [22,] 0.22222222 0.70833333 0.08474576 0.12500000
+## [23,] 0.08333333 0.66666667 0.00000000 0.04166667
+## [24,] 0.22222222 0.54166667 0.11864407 0.16666667
+## [25,] 0.13888889 0.58333333 0.15254237 0.04166667
+## [26,] 0.19444444 0.41666667 0.10169492 0.04166667
+## [27,] 0.19444444 0.58333333 0.10169492 0.12500000
+## [28,] 0.25000000 0.62500000 0.08474576 0.04166667
+## [29,] 0.25000000 0.58333333 0.06779661 0.04166667
+## [30,] 0.11111111 0.50000000 0.10169492 0.04166667
+## [31,] 0.13888889 0.45833333 0.10169492 0.04166667
+## [32,] 0.30555556 0.58333333 0.08474576 0.12500000
+## [33,] 0.25000000 0.87500000 0.08474576 0.00000000
+## [34,] 0.33333333 0.91666667 0.06779661 0.04166667
+## [35,] 0.16666667 0.45833333 0.08474576 0.04166667
+## [36,] 0.19444444 0.50000000 0.03389831 0.04166667
+## [37,] 0.33333333 0.62500000 0.05084746 0.04166667
+## [38,] 0.16666667 0.66666667 0.06779661 0.00000000
+## [39,] 0.02777778 0.41666667 0.05084746 0.04166667
+## [40,] 0.22222222 0.58333333 0.08474576 0.04166667
+## [41,] 0.19444444 0.62500000 0.05084746 0.08333333
+## [42,] 0.05555556 0.12500000 0.05084746 0.08333333
+## [43,] 0.02777778 0.50000000 0.05084746 0.04166667
+## [44,] 0.19444444 0.62500000 0.10169492 0.20833333
+## [45,] 0.22222222 0.75000000 0.15254237 0.12500000
+## [46,] 0.13888889 0.41666667 0.06779661 0.08333333
+## [47,] 0.22222222 0.75000000 0.10169492 0.04166667
+## [48,] 0.08333333 0.50000000 0.06779661 0.04166667
+## [49,] 0.27777778 0.70833333 0.08474576 0.04166667
+## [50,] 0.19444444 0.54166667 0.06779661 0.04166667
+## [51,] 0.75000000 0.50000000 0.62711864 0.54166667
+## [52,] 0.58333333 0.50000000 0.59322034 0.58333333
+## [53,] 0.72222222 0.45833333 0.66101695 0.58333333
+## [54,] 0.33333333 0.12500000 0.50847458 0.50000000
+## [55,] 0.61111111 0.33333333 0.61016949 0.58333333
+## [56,] 0.38888889 0.33333333 0.59322034 0.50000000
+## [57,] 0.55555556 0.54166667 0.62711864 0.62500000
+## [58,] 0.16666667 0.16666667 0.38983051 0.37500000
+## [59,] 0.63888889 0.37500000 0.61016949 0.50000000
+## [60,] 0.25000000 0.29166667 0.49152542 0.54166667
+## [61,] 0.19444444 0.00000000 0.42372881 0.37500000
+## [62,] 0.44444444 0.41666667 0.54237288 0.58333333
+## [63,] 0.47222222 0.08333333 0.50847458 0.37500000
+## [64,] 0.50000000 0.37500000 0.62711864 0.54166667
+## [65,] 0.36111111 0.37500000 0.44067797 0.50000000
+## [66,] 0.66666667 0.45833333 0.57627119 0.54166667
+## [67,] 0.36111111 0.41666667 0.59322034 0.58333333
+## [68,] 0.41666667 0.29166667 0.52542373 0.37500000
+## [69,] 0.52777778 0.08333333 0.59322034 0.58333333
+## [70,] 0.36111111 0.20833333 0.49152542 0.41666667
+## [71,] 0.44444444 0.50000000 0.64406780 0.70833333
+## [72,] 0.50000000 0.33333333 0.50847458 0.50000000
+## [73,] 0.55555556 0.20833333 0.66101695 0.58333333
+## [74,] 0.50000000 0.33333333 0.62711864 0.45833333
+## [75,] 0.58333333 0.37500000 0.55932203 0.50000000
+## [76,] 0.63888889 0.41666667 0.57627119 0.54166667
+## [77,] 0.69444444 0.33333333 0.64406780 0.54166667
+## [78,] 0.66666667 0.41666667 0.67796610 0.66666667
+## [79,] 0.47222222 0.37500000 0.59322034 0.58333333
+## [80,] 0.38888889 0.25000000 0.42372881 0.37500000
+## [81,] 0.33333333 0.16666667 0.47457627 0.41666667
+## [82,] 0.33333333 0.16666667 0.45762712 0.37500000
+## [83,] 0.41666667 0.29166667 0.49152542 0.45833333
+## [84,] 0.47222222 0.29166667 0.69491525 0.62500000
+## [85,] 0.30555556 0.41666667 0.59322034 0.58333333
+## [86,] 0.47222222 0.58333333 0.59322034 0.62500000
+## [87,] 0.66666667 0.45833333 0.62711864 0.58333333
+## [88,] 0.55555556 0.12500000 0.57627119 0.50000000
+## [89,] 0.36111111 0.41666667 0.52542373 0.50000000
+## [90,] 0.33333333 0.20833333 0.50847458 0.50000000
+## [91,] 0.33333333 0.25000000 0.57627119 0.45833333
+## [92,] 0.50000000 0.41666667 0.61016949 0.54166667
+## [93,] 0.41666667 0.25000000 0.50847458 0.45833333
+## [94,] 0.19444444 0.12500000 0.38983051 0.37500000
+## [95,] 0.36111111 0.29166667 0.54237288 0.50000000
+## [96,] 0.38888889 0.41666667 0.54237288 0.45833333
+## [97,] 0.38888889 0.37500000 0.54237288 0.50000000
+## [98,] 0.52777778 0.37500000 0.55932203 0.50000000
+## [99,] 0.22222222 0.20833333 0.33898305 0.41666667
+## [100,] 0.38888889 0.33333333 0.52542373 0.50000000
+## [101,] 0.55555556 0.54166667 0.84745763 1.00000000
+## [102,] 0.41666667 0.29166667 0.69491525 0.75000000
+## [103,] 0.77777778 0.41666667 0.83050847 0.83333333
+## [104,] 0.55555556 0.37500000 0.77966102 0.70833333
+## [105,] 0.61111111 0.41666667 0.81355932 0.87500000
+## [106,] 0.91666667 0.41666667 0.94915254 0.83333333
+## [107,] 0.16666667 0.20833333 0.59322034 0.66666667
+## [108,] 0.83333333 0.37500000 0.89830508 0.70833333
+## [109,] 0.66666667 0.20833333 0.81355932 0.70833333
+## [110,] 0.80555556 0.66666667 0.86440678 1.00000000
+## [111,] 0.61111111 0.50000000 0.69491525 0.79166667
+## [112,] 0.58333333 0.29166667 0.72881356 0.75000000
+## [113,] 0.69444444 0.41666667 0.76271186 0.83333333
+## [114,] 0.38888889 0.20833333 0.67796610 0.79166667
+## [115,] 0.41666667 0.33333333 0.69491525 0.95833333
+## [116,] 0.58333333 0.50000000 0.72881356 0.91666667
+## [117,] 0.61111111 0.41666667 0.76271186 0.70833333
+## [118,] 0.94444444 0.75000000 0.96610169 0.87500000
+## [119,] 0.94444444 0.25000000 1.00000000 0.91666667
+## [120,] 0.47222222 0.08333333 0.67796610 0.58333333
+## [121,] 0.72222222 0.50000000 0.79661017 0.91666667
+## [122,] 0.36111111 0.33333333 0.66101695 0.79166667
+## [123,] 0.94444444 0.33333333 0.96610169 0.79166667
+## [124,] 0.55555556 0.29166667 0.66101695 0.70833333
+## [125,] 0.66666667 0.54166667 0.79661017 0.83333333
+## [126,] 0.80555556 0.50000000 0.84745763 0.70833333
+## [127,] 0.52777778 0.33333333 0.64406780 0.70833333
+## [128,] 0.50000000 0.41666667 0.66101695 0.70833333
+## [129,] 0.58333333 0.33333333 0.77966102 0.83333333
+## [130,] 0.80555556 0.41666667 0.81355932 0.62500000
+## [131,] 0.86111111 0.33333333 0.86440678 0.75000000
+## [132,] 1.00000000 0.75000000 0.91525424 0.79166667
+## [133,] 0.58333333 0.33333333 0.77966102 0.87500000
+## [134,] 0.55555556 0.33333333 0.69491525 0.58333333
+## [135,] 0.50000000 0.25000000 0.77966102 0.54166667
+## [136,] 0.94444444 0.41666667 0.86440678 0.91666667
+## [137,] 0.55555556 0.58333333 0.77966102 0.95833333
+## [138,] 0.58333333 0.45833333 0.76271186 0.70833333
+## [139,] 0.47222222 0.41666667 0.64406780 0.70833333
+## [140,] 0.72222222 0.45833333 0.74576271 0.83333333
+## [141,] 0.66666667 0.45833333 0.77966102 0.95833333
+## [142,] 0.72222222 0.45833333 0.69491525 0.91666667
+## [143,] 0.41666667 0.29166667 0.69491525 0.75000000
+## [144,] 0.69444444 0.50000000 0.83050847 0.91666667
+## [145,] 0.66666667 0.54166667 0.79661017 1.00000000
+## [146,] 0.66666667 0.41666667 0.71186441 0.91666667
+## [147,] 0.55555556 0.20833333 0.67796610 0.75000000
+## [148,] 0.61111111 0.41666667 0.71186441 0.79166667
+## [149,] 0.52777778 0.58333333 0.74576271 0.91666667
+## [150,] 0.44444444 0.41666667 0.69491525 0.70833333
+
+
# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] 0.22222222 0.62500000 0.06779661 0.04166667
+# [2,] 0.16666667 0.41666667 0.06779661 0.04166667
+# [3,] 0.11111111 0.50000000 0.05084746 0.04166667
+# [4,] 0.08333333 0.45833333 0.08474576 0.04166667
+# [5,] 0.19444444 0.66666667 0.06779661 0.04166667
+# [6,] 0.30555556 0.79166667 0.11864407 0.12500000
+# [7,] 0.08333333 0.58333333 0.06779661 0.08333333
+
+#standardize
+apply(as.matrix(iris),2,standardization)
+
+
## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] -0.89767388 1.01560199 -1.33575163 -1.3110521482
+## [2,] -1.13920048 -0.13153881 -1.33575163 -1.3110521482
+## [3,] -1.38072709 0.32731751 -1.39239929 -1.3110521482
+## [4,] -1.50149039 0.09788935 -1.27910398 -1.3110521482
+## [5,] -1.01843718 1.24503015 -1.33575163 -1.3110521482
+## [6,] -0.53538397 1.93331463 -1.16580868 -1.0486667950
+## [7,] -1.50149039 0.78617383 -1.33575163 -1.1798594716
+## [8,] -1.01843718 0.78617383 -1.27910398 -1.3110521482
+## [9,] -1.74301699 -0.36096697 -1.33575163 -1.3110521482
+## [10,] -1.13920048 0.09788935 -1.27910398 -1.4422448248
+## [11,] -0.53538397 1.47445831 -1.27910398 -1.3110521482
+## [12,] -1.25996379 0.78617383 -1.22245633 -1.3110521482
+## [13,] -1.25996379 -0.13153881 -1.33575163 -1.4422448248
+## [14,] -1.86378030 -0.13153881 -1.50569459 -1.4422448248
+## [15,] -0.05233076 2.16274279 -1.44904694 -1.3110521482
+## [16,] -0.17309407 3.08045544 -1.27910398 -1.0486667950
+## [17,] -0.53538397 1.93331463 -1.39239929 -1.0486667950
+## [18,] -0.89767388 1.01560199 -1.33575163 -1.1798594716
+## [19,] -0.17309407 1.70388647 -1.16580868 -1.1798594716
+## [20,] -0.89767388 1.70388647 -1.27910398 -1.1798594716
+## [21,] -0.53538397 0.78617383 -1.16580868 -1.3110521482
+## [22,] -0.89767388 1.47445831 -1.27910398 -1.0486667950
+## [23,] -1.50149039 1.24503015 -1.56234224 -1.3110521482
+## [24,] -0.89767388 0.55674567 -1.16580868 -0.9174741184
+## [25,] -1.25996379 0.78617383 -1.05251337 -1.3110521482
+## [26,] -1.01843718 -0.13153881 -1.22245633 -1.3110521482
+## [27,] -1.01843718 0.78617383 -1.22245633 -1.0486667950
+## [28,] -0.77691058 1.01560199 -1.27910398 -1.3110521482
+## [29,] -0.77691058 0.78617383 -1.33575163 -1.3110521482
+## [30,] -1.38072709 0.32731751 -1.22245633 -1.3110521482
+## [31,] -1.25996379 0.09788935 -1.22245633 -1.3110521482
+## [32,] -0.53538397 0.78617383 -1.27910398 -1.0486667950
+## [33,] -0.77691058 2.39217095 -1.27910398 -1.4422448248
+## [34,] -0.41462067 2.62159911 -1.33575163 -1.3110521482
+## [35,] -1.13920048 0.09788935 -1.27910398 -1.3110521482
+## [36,] -1.01843718 0.32731751 -1.44904694 -1.3110521482
+## [37,] -0.41462067 1.01560199 -1.39239929 -1.3110521482
+## [38,] -1.13920048 1.24503015 -1.33575163 -1.4422448248
+## [39,] -1.74301699 -0.13153881 -1.39239929 -1.3110521482
+## [40,] -0.89767388 0.78617383 -1.27910398 -1.3110521482
+## [41,] -1.01843718 1.01560199 -1.39239929 -1.1798594716
+## [42,] -1.62225369 -1.73753594 -1.39239929 -1.1798594716
+## [43,] -1.74301699 0.32731751 -1.39239929 -1.3110521482
+## [44,] -1.01843718 1.01560199 -1.22245633 -0.7862814418
+## [45,] -0.89767388 1.70388647 -1.05251337 -1.0486667950
+## [46,] -1.25996379 -0.13153881 -1.33575163 -1.1798594716
+## [47,] -0.89767388 1.70388647 -1.22245633 -1.3110521482
+## [48,] -1.50149039 0.32731751 -1.33575163 -1.3110521482
+## [49,] -0.65614727 1.47445831 -1.27910398 -1.3110521482
+## [50,] -1.01843718 0.55674567 -1.33575163 -1.3110521482
+## [51,] 1.39682886 0.32731751 0.53362088 0.2632599711
+## [52,] 0.67224905 0.32731751 0.42032558 0.3944526477
+## [53,] 1.27606556 0.09788935 0.64691619 0.3944526477
+## [54,] -0.41462067 -1.73753594 0.13708732 0.1320672944
+## [55,] 0.79301235 -0.59039513 0.47697323 0.3944526477
+## [56,] -0.17309407 -0.59039513 0.42032558 0.1320672944
+## [57,] 0.55148575 0.55674567 0.53362088 0.5256453243
+## [58,] -1.13920048 -1.50810778 -0.25944625 -0.2615107354
+## [59,] 0.91377565 -0.36096697 0.47697323 0.1320672944
+## [60,] -0.77691058 -0.81982329 0.08043967 0.2632599711
+## [61,] -1.01843718 -2.42582042 -0.14615094 -0.2615107354
+## [62,] 0.06843254 -0.13153881 0.25038262 0.3944526477
+## [63,] 0.18919584 -1.96696410 0.13708732 -0.2615107354
+## [64,] 0.30995914 -0.36096697 0.53362088 0.2632599711
+## [65,] -0.29385737 -0.36096697 -0.08950329 0.1320672944
+## [66,] 1.03453895 0.09788935 0.36367793 0.2632599711
+## [67,] -0.29385737 -0.13153881 0.42032558 0.3944526477
+## [68,] -0.05233076 -0.81982329 0.19373497 -0.2615107354
+## [69,] 0.43072244 -1.96696410 0.42032558 0.3944526477
+## [70,] -0.29385737 -1.27867961 0.08043967 -0.1303180588
+## [71,] 0.06843254 0.32731751 0.59026853 0.7880306775
+## [72,] 0.30995914 -0.59039513 0.13708732 0.1320672944
+## [73,] 0.55148575 -1.27867961 0.64691619 0.3944526477
+## [74,] 0.30995914 -0.59039513 0.53362088 0.0008746178
+## [75,] 0.67224905 -0.36096697 0.30703027 0.1320672944
+## [76,] 0.91377565 -0.13153881 0.36367793 0.2632599711
+## [77,] 1.15530226 -0.59039513 0.59026853 0.2632599711
+## [78,] 1.03453895 -0.13153881 0.70356384 0.6568380009
+## [79,] 0.18919584 -0.36096697 0.42032558 0.3944526477
+## [80,] -0.17309407 -1.04925145 -0.14615094 -0.2615107354
+## [81,] -0.41462067 -1.50810778 0.02379201 -0.1303180588
+## [82,] -0.41462067 -1.50810778 -0.03285564 -0.2615107354
+## [83,] -0.05233076 -0.81982329 0.08043967 0.0008746178
+## [84,] 0.18919584 -0.81982329 0.76021149 0.5256453243
+## [85,] -0.53538397 -0.13153881 0.42032558 0.3944526477
+## [86,] 0.18919584 0.78617383 0.42032558 0.5256453243
+## [87,] 1.03453895 0.09788935 0.53362088 0.3944526477
+## [88,] 0.55148575 -1.73753594 0.36367793 0.1320672944
+## [89,] -0.29385737 -0.13153881 0.19373497 0.1320672944
+## [90,] -0.41462067 -1.27867961 0.13708732 0.1320672944
+## [91,] -0.41462067 -1.04925145 0.36367793 0.0008746178
+## [92,] 0.30995914 -0.13153881 0.47697323 0.2632599711
+## [93,] -0.05233076 -1.04925145 0.13708732 0.0008746178
+## [94,] -1.01843718 -1.73753594 -0.25944625 -0.2615107354
+## [95,] -0.29385737 -0.81982329 0.25038262 0.1320672944
+## [96,] -0.17309407 -0.13153881 0.25038262 0.0008746178
+## [97,] -0.17309407 -0.36096697 0.25038262 0.1320672944
+## [98,] 0.43072244 -0.36096697 0.30703027 0.1320672944
+## [99,] -0.89767388 -1.27867961 -0.42938920 -0.1303180588
+## [100,] -0.17309407 -0.59039513 0.19373497 0.1320672944
+## [101,] 0.55148575 0.55674567 1.27004036 1.7063794137
+## [102,] -0.05233076 -0.81982329 0.76021149 0.9192233541
+## [103,] 1.51759216 -0.13153881 1.21339271 1.1816087073
+## [104,] 0.55148575 -0.36096697 1.04344975 0.7880306775
+## [105,] 0.79301235 -0.13153881 1.15674505 1.3128013839
+## [106,] 2.12140867 -0.13153881 1.60992627 1.1816087073
+## [107,] -1.13920048 -1.27867961 0.42032558 0.6568380009
+## [108,] 1.75911877 -0.36096697 1.43998331 0.7880306775
+## [109,] 1.03453895 -1.27867961 1.15674505 0.7880306775
+## [110,] 1.63835547 1.24503015 1.32668801 1.7063794137
+## [111,] 0.79301235 0.32731751 0.76021149 1.0504160307
+## [112,] 0.67224905 -0.81982329 0.87350679 0.9192233541
+## [113,] 1.15530226 -0.13153881 0.98680210 1.1816087073
+## [114,] -0.17309407 -1.27867961 0.70356384 1.0504160307
+## [115,] -0.05233076 -0.59039513 0.76021149 1.5751867371
+## [116,] 0.67224905 0.32731751 0.87350679 1.4439940605
+## [117,] 0.79301235 -0.13153881 0.98680210 0.7880306775
+## [118,] 2.24217198 1.70388647 1.66657392 1.3128013839
+## [119,] 2.24217198 -1.04925145 1.77986923 1.4439940605
+## [120,] 0.18919584 -1.96696410 0.70356384 0.3944526477
+## [121,] 1.27606556 0.32731751 1.10009740 1.4439940605
+## [122,] -0.29385737 -0.59039513 0.64691619 1.0504160307
+## [123,] 2.24217198 -0.59039513 1.66657392 1.0504160307
+## [124,] 0.55148575 -0.81982329 0.64691619 0.7880306775
+## [125,] 1.03453895 0.55674567 1.10009740 1.1816087073
+## [126,] 1.63835547 0.32731751 1.27004036 0.7880306775
+## [127,] 0.43072244 -0.59039513 0.59026853 0.7880306775
+## [128,] 0.30995914 -0.13153881 0.64691619 0.7880306775
+## [129,] 0.67224905 -0.59039513 1.04344975 1.1816087073
+## [130,] 1.63835547 -0.13153881 1.15674505 0.5256453243
+## [131,] 1.87988207 -0.59039513 1.32668801 0.9192233541
+## [132,] 2.48369858 1.70388647 1.49663097 1.0504160307
+## [133,] 0.67224905 -0.59039513 1.04344975 1.3128013839
+## [134,] 0.55148575 -0.59039513 0.76021149 0.3944526477
+## [135,] 0.30995914 -1.04925145 1.04344975 0.2632599711
+## [136,] 2.24217198 -0.13153881 1.32668801 1.4439940605
+## [137,] 0.55148575 0.78617383 1.04344975 1.5751867371
+## [138,] 0.67224905 0.09788935 0.98680210 0.7880306775
+## [139,] 0.18919584 -0.13153881 0.59026853 0.7880306775
+## [140,] 1.27606556 0.09788935 0.93015445 1.1816087073
+## [141,] 1.03453895 0.09788935 1.04344975 1.5751867371
+## [142,] 1.27606556 0.09788935 0.76021149 1.4439940605
+## [143,] -0.05233076 -0.81982329 0.76021149 0.9192233541
+## [144,] 1.15530226 0.32731751 1.21339271 1.4439940605
+## [145,] 1.03453895 0.55674567 1.10009740 1.7063794137
+## [146,] 1.03453895 -0.13153881 0.81685914 1.4439940605
+## [147,] 0.55148575 -1.27867961 0.70356384 0.9192233541
+## [148,] 0.79301235 -0.13153881 0.81685914 1.0504160307
+## [149,] 0.43072244 0.78617383 0.93015445 1.4439940605
+## [150,] 0.06843254 -0.13153881 0.76021149 0.7880306775
+
+
# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] -0.89767388 1.01560199 -1.33575163 -1.3110521482
+# [2,] -1.13920048 -0.13153881 -1.33575163 -1.3110521482
+# [3,] -1.38072709 0.32731751 -1.39239929 -1.3110521482
+# [4,] -1.50149039 0.09788935 -1.27910398 -1.3110521482
+# [5,] -1.01843718 1.24503015 -1.33575163 -1.3110521482
+# [6,] -0.53538397 1.93331463 -1.16580868 -1.0486667950
+# [7,] -1.50149039 0.78617383 -1.33575163 -1.1798594716
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/data_normalization_standardization.md b/Desktop/open-source/R/documentation/data_normalization_standardization.md
new file mode 100644
index 00000000..b9de33e1
--- /dev/null
+++ b/Desktop/open-source/R/documentation/data_normalization_standardization.md
@@ -0,0 +1,394 @@
+
+
+``` r
+# normalization & standardization
+normalization<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+
+standardization<-function(x){
+ return((x-mean(x))/sd(x))
+}
+
+head(iris)
+```
+
+```
+## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+## 1 5.1 3.5 1.4 0.2 setosa
+## 2 4.9 3.0 1.4 0.2 setosa
+## 3 4.7 3.2 1.3 0.2 setosa
+## 4 4.6 3.1 1.5 0.2 setosa
+## 5 5.0 3.6 1.4 0.2 setosa
+## 6 5.4 3.9 1.7 0.4 setosa
+```
+
+``` r
+# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
+# 1 5.1 3.5 1.4 0.2 setosa
+# 2 4.9 3.0 1.4 0.2 setosa
+# 3 4.7 3.2 1.3 0.2 setosa
+# 4 4.6 3.1 1.5 0.2 setosa
+# 5 5.0 3.6 1.4 0.2 setosa
+# 6 5.4 3.9 1.7 0.4 setosa
+
+iris<-iris[,-5]
+head(iris)
+```
+
+```
+## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## 1 5.1 3.5 1.4 0.2
+## 2 4.9 3.0 1.4 0.2
+## 3 4.7 3.2 1.3 0.2
+## 4 4.6 3.1 1.5 0.2
+## 5 5.0 3.6 1.4 0.2
+## 6 5.4 3.9 1.7 0.4
+```
+
+``` r
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# 1 5.1 3.5 1.4 0.2
+# 2 4.9 3.0 1.4 0.2
+# 3 4.7 3.2 1.3 0.2
+# 4 4.6 3.1 1.5 0.2
+# 5 5.0 3.6 1.4 0.2
+# 6 5.4 3.9 1.7 0.4
+
+#normalize
+apply(as.matrix(iris),2,normalization)
+```
+
+```
+## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] 0.22222222 0.62500000 0.06779661 0.04166667
+## [2,] 0.16666667 0.41666667 0.06779661 0.04166667
+## [3,] 0.11111111 0.50000000 0.05084746 0.04166667
+## [4,] 0.08333333 0.45833333 0.08474576 0.04166667
+## [5,] 0.19444444 0.66666667 0.06779661 0.04166667
+## [6,] 0.30555556 0.79166667 0.11864407 0.12500000
+## [7,] 0.08333333 0.58333333 0.06779661 0.08333333
+## [8,] 0.19444444 0.58333333 0.08474576 0.04166667
+## [9,] 0.02777778 0.37500000 0.06779661 0.04166667
+## [10,] 0.16666667 0.45833333 0.08474576 0.00000000
+## [11,] 0.30555556 0.70833333 0.08474576 0.04166667
+## [12,] 0.13888889 0.58333333 0.10169492 0.04166667
+## [13,] 0.13888889 0.41666667 0.06779661 0.00000000
+## [14,] 0.00000000 0.41666667 0.01694915 0.00000000
+## [15,] 0.41666667 0.83333333 0.03389831 0.04166667
+## [16,] 0.38888889 1.00000000 0.08474576 0.12500000
+## [17,] 0.30555556 0.79166667 0.05084746 0.12500000
+## [18,] 0.22222222 0.62500000 0.06779661 0.08333333
+## [19,] 0.38888889 0.75000000 0.11864407 0.08333333
+## [20,] 0.22222222 0.75000000 0.08474576 0.08333333
+## [21,] 0.30555556 0.58333333 0.11864407 0.04166667
+## [22,] 0.22222222 0.70833333 0.08474576 0.12500000
+## [23,] 0.08333333 0.66666667 0.00000000 0.04166667
+## [24,] 0.22222222 0.54166667 0.11864407 0.16666667
+## [25,] 0.13888889 0.58333333 0.15254237 0.04166667
+## [26,] 0.19444444 0.41666667 0.10169492 0.04166667
+## [27,] 0.19444444 0.58333333 0.10169492 0.12500000
+## [28,] 0.25000000 0.62500000 0.08474576 0.04166667
+## [29,] 0.25000000 0.58333333 0.06779661 0.04166667
+## [30,] 0.11111111 0.50000000 0.10169492 0.04166667
+## [31,] 0.13888889 0.45833333 0.10169492 0.04166667
+## [32,] 0.30555556 0.58333333 0.08474576 0.12500000
+## [33,] 0.25000000 0.87500000 0.08474576 0.00000000
+## [34,] 0.33333333 0.91666667 0.06779661 0.04166667
+## [35,] 0.16666667 0.45833333 0.08474576 0.04166667
+## [36,] 0.19444444 0.50000000 0.03389831 0.04166667
+## [37,] 0.33333333 0.62500000 0.05084746 0.04166667
+## [38,] 0.16666667 0.66666667 0.06779661 0.00000000
+## [39,] 0.02777778 0.41666667 0.05084746 0.04166667
+## [40,] 0.22222222 0.58333333 0.08474576 0.04166667
+## [41,] 0.19444444 0.62500000 0.05084746 0.08333333
+## [42,] 0.05555556 0.12500000 0.05084746 0.08333333
+## [43,] 0.02777778 0.50000000 0.05084746 0.04166667
+## [44,] 0.19444444 0.62500000 0.10169492 0.20833333
+## [45,] 0.22222222 0.75000000 0.15254237 0.12500000
+## [46,] 0.13888889 0.41666667 0.06779661 0.08333333
+## [47,] 0.22222222 0.75000000 0.10169492 0.04166667
+## [48,] 0.08333333 0.50000000 0.06779661 0.04166667
+## [49,] 0.27777778 0.70833333 0.08474576 0.04166667
+## [50,] 0.19444444 0.54166667 0.06779661 0.04166667
+## [51,] 0.75000000 0.50000000 0.62711864 0.54166667
+## [52,] 0.58333333 0.50000000 0.59322034 0.58333333
+## [53,] 0.72222222 0.45833333 0.66101695 0.58333333
+## [54,] 0.33333333 0.12500000 0.50847458 0.50000000
+## [55,] 0.61111111 0.33333333 0.61016949 0.58333333
+## [56,] 0.38888889 0.33333333 0.59322034 0.50000000
+## [57,] 0.55555556 0.54166667 0.62711864 0.62500000
+## [58,] 0.16666667 0.16666667 0.38983051 0.37500000
+## [59,] 0.63888889 0.37500000 0.61016949 0.50000000
+## [60,] 0.25000000 0.29166667 0.49152542 0.54166667
+## [61,] 0.19444444 0.00000000 0.42372881 0.37500000
+## [62,] 0.44444444 0.41666667 0.54237288 0.58333333
+## [63,] 0.47222222 0.08333333 0.50847458 0.37500000
+## [64,] 0.50000000 0.37500000 0.62711864 0.54166667
+## [65,] 0.36111111 0.37500000 0.44067797 0.50000000
+## [66,] 0.66666667 0.45833333 0.57627119 0.54166667
+## [67,] 0.36111111 0.41666667 0.59322034 0.58333333
+## [68,] 0.41666667 0.29166667 0.52542373 0.37500000
+## [69,] 0.52777778 0.08333333 0.59322034 0.58333333
+## [70,] 0.36111111 0.20833333 0.49152542 0.41666667
+## [71,] 0.44444444 0.50000000 0.64406780 0.70833333
+## [72,] 0.50000000 0.33333333 0.50847458 0.50000000
+## [73,] 0.55555556 0.20833333 0.66101695 0.58333333
+## [74,] 0.50000000 0.33333333 0.62711864 0.45833333
+## [75,] 0.58333333 0.37500000 0.55932203 0.50000000
+## [76,] 0.63888889 0.41666667 0.57627119 0.54166667
+## [77,] 0.69444444 0.33333333 0.64406780 0.54166667
+## [78,] 0.66666667 0.41666667 0.67796610 0.66666667
+## [79,] 0.47222222 0.37500000 0.59322034 0.58333333
+## [80,] 0.38888889 0.25000000 0.42372881 0.37500000
+## [81,] 0.33333333 0.16666667 0.47457627 0.41666667
+## [82,] 0.33333333 0.16666667 0.45762712 0.37500000
+## [83,] 0.41666667 0.29166667 0.49152542 0.45833333
+## [84,] 0.47222222 0.29166667 0.69491525 0.62500000
+## [85,] 0.30555556 0.41666667 0.59322034 0.58333333
+## [86,] 0.47222222 0.58333333 0.59322034 0.62500000
+## [87,] 0.66666667 0.45833333 0.62711864 0.58333333
+## [88,] 0.55555556 0.12500000 0.57627119 0.50000000
+## [89,] 0.36111111 0.41666667 0.52542373 0.50000000
+## [90,] 0.33333333 0.20833333 0.50847458 0.50000000
+## [91,] 0.33333333 0.25000000 0.57627119 0.45833333
+## [92,] 0.50000000 0.41666667 0.61016949 0.54166667
+## [93,] 0.41666667 0.25000000 0.50847458 0.45833333
+## [94,] 0.19444444 0.12500000 0.38983051 0.37500000
+## [95,] 0.36111111 0.29166667 0.54237288 0.50000000
+## [96,] 0.38888889 0.41666667 0.54237288 0.45833333
+## [97,] 0.38888889 0.37500000 0.54237288 0.50000000
+## [98,] 0.52777778 0.37500000 0.55932203 0.50000000
+## [99,] 0.22222222 0.20833333 0.33898305 0.41666667
+## [100,] 0.38888889 0.33333333 0.52542373 0.50000000
+## [101,] 0.55555556 0.54166667 0.84745763 1.00000000
+## [102,] 0.41666667 0.29166667 0.69491525 0.75000000
+## [103,] 0.77777778 0.41666667 0.83050847 0.83333333
+## [104,] 0.55555556 0.37500000 0.77966102 0.70833333
+## [105,] 0.61111111 0.41666667 0.81355932 0.87500000
+## [106,] 0.91666667 0.41666667 0.94915254 0.83333333
+## [107,] 0.16666667 0.20833333 0.59322034 0.66666667
+## [108,] 0.83333333 0.37500000 0.89830508 0.70833333
+## [109,] 0.66666667 0.20833333 0.81355932 0.70833333
+## [110,] 0.80555556 0.66666667 0.86440678 1.00000000
+## [111,] 0.61111111 0.50000000 0.69491525 0.79166667
+## [112,] 0.58333333 0.29166667 0.72881356 0.75000000
+## [113,] 0.69444444 0.41666667 0.76271186 0.83333333
+## [114,] 0.38888889 0.20833333 0.67796610 0.79166667
+## [115,] 0.41666667 0.33333333 0.69491525 0.95833333
+## [116,] 0.58333333 0.50000000 0.72881356 0.91666667
+## [117,] 0.61111111 0.41666667 0.76271186 0.70833333
+## [118,] 0.94444444 0.75000000 0.96610169 0.87500000
+## [119,] 0.94444444 0.25000000 1.00000000 0.91666667
+## [120,] 0.47222222 0.08333333 0.67796610 0.58333333
+## [121,] 0.72222222 0.50000000 0.79661017 0.91666667
+## [122,] 0.36111111 0.33333333 0.66101695 0.79166667
+## [123,] 0.94444444 0.33333333 0.96610169 0.79166667
+## [124,] 0.55555556 0.29166667 0.66101695 0.70833333
+## [125,] 0.66666667 0.54166667 0.79661017 0.83333333
+## [126,] 0.80555556 0.50000000 0.84745763 0.70833333
+## [127,] 0.52777778 0.33333333 0.64406780 0.70833333
+## [128,] 0.50000000 0.41666667 0.66101695 0.70833333
+## [129,] 0.58333333 0.33333333 0.77966102 0.83333333
+## [130,] 0.80555556 0.41666667 0.81355932 0.62500000
+## [131,] 0.86111111 0.33333333 0.86440678 0.75000000
+## [132,] 1.00000000 0.75000000 0.91525424 0.79166667
+## [133,] 0.58333333 0.33333333 0.77966102 0.87500000
+## [134,] 0.55555556 0.33333333 0.69491525 0.58333333
+## [135,] 0.50000000 0.25000000 0.77966102 0.54166667
+## [136,] 0.94444444 0.41666667 0.86440678 0.91666667
+## [137,] 0.55555556 0.58333333 0.77966102 0.95833333
+## [138,] 0.58333333 0.45833333 0.76271186 0.70833333
+## [139,] 0.47222222 0.41666667 0.64406780 0.70833333
+## [140,] 0.72222222 0.45833333 0.74576271 0.83333333
+## [141,] 0.66666667 0.45833333 0.77966102 0.95833333
+## [142,] 0.72222222 0.45833333 0.69491525 0.91666667
+## [143,] 0.41666667 0.29166667 0.69491525 0.75000000
+## [144,] 0.69444444 0.50000000 0.83050847 0.91666667
+## [145,] 0.66666667 0.54166667 0.79661017 1.00000000
+## [146,] 0.66666667 0.41666667 0.71186441 0.91666667
+## [147,] 0.55555556 0.20833333 0.67796610 0.75000000
+## [148,] 0.61111111 0.41666667 0.71186441 0.79166667
+## [149,] 0.52777778 0.58333333 0.74576271 0.91666667
+## [150,] 0.44444444 0.41666667 0.69491525 0.70833333
+```
+
+``` r
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] 0.22222222 0.62500000 0.06779661 0.04166667
+# [2,] 0.16666667 0.41666667 0.06779661 0.04166667
+# [3,] 0.11111111 0.50000000 0.05084746 0.04166667
+# [4,] 0.08333333 0.45833333 0.08474576 0.04166667
+# [5,] 0.19444444 0.66666667 0.06779661 0.04166667
+# [6,] 0.30555556 0.79166667 0.11864407 0.12500000
+# [7,] 0.08333333 0.58333333 0.06779661 0.08333333
+
+#standardize
+apply(as.matrix(iris),2,standardization)
+```
+
+```
+## Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] -0.89767388 1.01560199 -1.33575163 -1.3110521482
+## [2,] -1.13920048 -0.13153881 -1.33575163 -1.3110521482
+## [3,] -1.38072709 0.32731751 -1.39239929 -1.3110521482
+## [4,] -1.50149039 0.09788935 -1.27910398 -1.3110521482
+## [5,] -1.01843718 1.24503015 -1.33575163 -1.3110521482
+## [6,] -0.53538397 1.93331463 -1.16580868 -1.0486667950
+## [7,] -1.50149039 0.78617383 -1.33575163 -1.1798594716
+## [8,] -1.01843718 0.78617383 -1.27910398 -1.3110521482
+## [9,] -1.74301699 -0.36096697 -1.33575163 -1.3110521482
+## [10,] -1.13920048 0.09788935 -1.27910398 -1.4422448248
+## [11,] -0.53538397 1.47445831 -1.27910398 -1.3110521482
+## [12,] -1.25996379 0.78617383 -1.22245633 -1.3110521482
+## [13,] -1.25996379 -0.13153881 -1.33575163 -1.4422448248
+## [14,] -1.86378030 -0.13153881 -1.50569459 -1.4422448248
+## [15,] -0.05233076 2.16274279 -1.44904694 -1.3110521482
+## [16,] -0.17309407 3.08045544 -1.27910398 -1.0486667950
+## [17,] -0.53538397 1.93331463 -1.39239929 -1.0486667950
+## [18,] -0.89767388 1.01560199 -1.33575163 -1.1798594716
+## [19,] -0.17309407 1.70388647 -1.16580868 -1.1798594716
+## [20,] -0.89767388 1.70388647 -1.27910398 -1.1798594716
+## [21,] -0.53538397 0.78617383 -1.16580868 -1.3110521482
+## [22,] -0.89767388 1.47445831 -1.27910398 -1.0486667950
+## [23,] -1.50149039 1.24503015 -1.56234224 -1.3110521482
+## [24,] -0.89767388 0.55674567 -1.16580868 -0.9174741184
+## [25,] -1.25996379 0.78617383 -1.05251337 -1.3110521482
+## [26,] -1.01843718 -0.13153881 -1.22245633 -1.3110521482
+## [27,] -1.01843718 0.78617383 -1.22245633 -1.0486667950
+## [28,] -0.77691058 1.01560199 -1.27910398 -1.3110521482
+## [29,] -0.77691058 0.78617383 -1.33575163 -1.3110521482
+## [30,] -1.38072709 0.32731751 -1.22245633 -1.3110521482
+## [31,] -1.25996379 0.09788935 -1.22245633 -1.3110521482
+## [32,] -0.53538397 0.78617383 -1.27910398 -1.0486667950
+## [33,] -0.77691058 2.39217095 -1.27910398 -1.4422448248
+## [34,] -0.41462067 2.62159911 -1.33575163 -1.3110521482
+## [35,] -1.13920048 0.09788935 -1.27910398 -1.3110521482
+## [36,] -1.01843718 0.32731751 -1.44904694 -1.3110521482
+## [37,] -0.41462067 1.01560199 -1.39239929 -1.3110521482
+## [38,] -1.13920048 1.24503015 -1.33575163 -1.4422448248
+## [39,] -1.74301699 -0.13153881 -1.39239929 -1.3110521482
+## [40,] -0.89767388 0.78617383 -1.27910398 -1.3110521482
+## [41,] -1.01843718 1.01560199 -1.39239929 -1.1798594716
+## [42,] -1.62225369 -1.73753594 -1.39239929 -1.1798594716
+## [43,] -1.74301699 0.32731751 -1.39239929 -1.3110521482
+## [44,] -1.01843718 1.01560199 -1.22245633 -0.7862814418
+## [45,] -0.89767388 1.70388647 -1.05251337 -1.0486667950
+## [46,] -1.25996379 -0.13153881 -1.33575163 -1.1798594716
+## [47,] -0.89767388 1.70388647 -1.22245633 -1.3110521482
+## [48,] -1.50149039 0.32731751 -1.33575163 -1.3110521482
+## [49,] -0.65614727 1.47445831 -1.27910398 -1.3110521482
+## [50,] -1.01843718 0.55674567 -1.33575163 -1.3110521482
+## [51,] 1.39682886 0.32731751 0.53362088 0.2632599711
+## [52,] 0.67224905 0.32731751 0.42032558 0.3944526477
+## [53,] 1.27606556 0.09788935 0.64691619 0.3944526477
+## [54,] -0.41462067 -1.73753594 0.13708732 0.1320672944
+## [55,] 0.79301235 -0.59039513 0.47697323 0.3944526477
+## [56,] -0.17309407 -0.59039513 0.42032558 0.1320672944
+## [57,] 0.55148575 0.55674567 0.53362088 0.5256453243
+## [58,] -1.13920048 -1.50810778 -0.25944625 -0.2615107354
+## [59,] 0.91377565 -0.36096697 0.47697323 0.1320672944
+## [60,] -0.77691058 -0.81982329 0.08043967 0.2632599711
+## [61,] -1.01843718 -2.42582042 -0.14615094 -0.2615107354
+## [62,] 0.06843254 -0.13153881 0.25038262 0.3944526477
+## [63,] 0.18919584 -1.96696410 0.13708732 -0.2615107354
+## [64,] 0.30995914 -0.36096697 0.53362088 0.2632599711
+## [65,] -0.29385737 -0.36096697 -0.08950329 0.1320672944
+## [66,] 1.03453895 0.09788935 0.36367793 0.2632599711
+## [67,] -0.29385737 -0.13153881 0.42032558 0.3944526477
+## [68,] -0.05233076 -0.81982329 0.19373497 -0.2615107354
+## [69,] 0.43072244 -1.96696410 0.42032558 0.3944526477
+## [70,] -0.29385737 -1.27867961 0.08043967 -0.1303180588
+## [71,] 0.06843254 0.32731751 0.59026853 0.7880306775
+## [72,] 0.30995914 -0.59039513 0.13708732 0.1320672944
+## [73,] 0.55148575 -1.27867961 0.64691619 0.3944526477
+## [74,] 0.30995914 -0.59039513 0.53362088 0.0008746178
+## [75,] 0.67224905 -0.36096697 0.30703027 0.1320672944
+## [76,] 0.91377565 -0.13153881 0.36367793 0.2632599711
+## [77,] 1.15530226 -0.59039513 0.59026853 0.2632599711
+## [78,] 1.03453895 -0.13153881 0.70356384 0.6568380009
+## [79,] 0.18919584 -0.36096697 0.42032558 0.3944526477
+## [80,] -0.17309407 -1.04925145 -0.14615094 -0.2615107354
+## [81,] -0.41462067 -1.50810778 0.02379201 -0.1303180588
+## [82,] -0.41462067 -1.50810778 -0.03285564 -0.2615107354
+## [83,] -0.05233076 -0.81982329 0.08043967 0.0008746178
+## [84,] 0.18919584 -0.81982329 0.76021149 0.5256453243
+## [85,] -0.53538397 -0.13153881 0.42032558 0.3944526477
+## [86,] 0.18919584 0.78617383 0.42032558 0.5256453243
+## [87,] 1.03453895 0.09788935 0.53362088 0.3944526477
+## [88,] 0.55148575 -1.73753594 0.36367793 0.1320672944
+## [89,] -0.29385737 -0.13153881 0.19373497 0.1320672944
+## [90,] -0.41462067 -1.27867961 0.13708732 0.1320672944
+## [91,] -0.41462067 -1.04925145 0.36367793 0.0008746178
+## [92,] 0.30995914 -0.13153881 0.47697323 0.2632599711
+## [93,] -0.05233076 -1.04925145 0.13708732 0.0008746178
+## [94,] -1.01843718 -1.73753594 -0.25944625 -0.2615107354
+## [95,] -0.29385737 -0.81982329 0.25038262 0.1320672944
+## [96,] -0.17309407 -0.13153881 0.25038262 0.0008746178
+## [97,] -0.17309407 -0.36096697 0.25038262 0.1320672944
+## [98,] 0.43072244 -0.36096697 0.30703027 0.1320672944
+## [99,] -0.89767388 -1.27867961 -0.42938920 -0.1303180588
+## [100,] -0.17309407 -0.59039513 0.19373497 0.1320672944
+## [101,] 0.55148575 0.55674567 1.27004036 1.7063794137
+## [102,] -0.05233076 -0.81982329 0.76021149 0.9192233541
+## [103,] 1.51759216 -0.13153881 1.21339271 1.1816087073
+## [104,] 0.55148575 -0.36096697 1.04344975 0.7880306775
+## [105,] 0.79301235 -0.13153881 1.15674505 1.3128013839
+## [106,] 2.12140867 -0.13153881 1.60992627 1.1816087073
+## [107,] -1.13920048 -1.27867961 0.42032558 0.6568380009
+## [108,] 1.75911877 -0.36096697 1.43998331 0.7880306775
+## [109,] 1.03453895 -1.27867961 1.15674505 0.7880306775
+## [110,] 1.63835547 1.24503015 1.32668801 1.7063794137
+## [111,] 0.79301235 0.32731751 0.76021149 1.0504160307
+## [112,] 0.67224905 -0.81982329 0.87350679 0.9192233541
+## [113,] 1.15530226 -0.13153881 0.98680210 1.1816087073
+## [114,] -0.17309407 -1.27867961 0.70356384 1.0504160307
+## [115,] -0.05233076 -0.59039513 0.76021149 1.5751867371
+## [116,] 0.67224905 0.32731751 0.87350679 1.4439940605
+## [117,] 0.79301235 -0.13153881 0.98680210 0.7880306775
+## [118,] 2.24217198 1.70388647 1.66657392 1.3128013839
+## [119,] 2.24217198 -1.04925145 1.77986923 1.4439940605
+## [120,] 0.18919584 -1.96696410 0.70356384 0.3944526477
+## [121,] 1.27606556 0.32731751 1.10009740 1.4439940605
+## [122,] -0.29385737 -0.59039513 0.64691619 1.0504160307
+## [123,] 2.24217198 -0.59039513 1.66657392 1.0504160307
+## [124,] 0.55148575 -0.81982329 0.64691619 0.7880306775
+## [125,] 1.03453895 0.55674567 1.10009740 1.1816087073
+## [126,] 1.63835547 0.32731751 1.27004036 0.7880306775
+## [127,] 0.43072244 -0.59039513 0.59026853 0.7880306775
+## [128,] 0.30995914 -0.13153881 0.64691619 0.7880306775
+## [129,] 0.67224905 -0.59039513 1.04344975 1.1816087073
+## [130,] 1.63835547 -0.13153881 1.15674505 0.5256453243
+## [131,] 1.87988207 -0.59039513 1.32668801 0.9192233541
+## [132,] 2.48369858 1.70388647 1.49663097 1.0504160307
+## [133,] 0.67224905 -0.59039513 1.04344975 1.3128013839
+## [134,] 0.55148575 -0.59039513 0.76021149 0.3944526477
+## [135,] 0.30995914 -1.04925145 1.04344975 0.2632599711
+## [136,] 2.24217198 -0.13153881 1.32668801 1.4439940605
+## [137,] 0.55148575 0.78617383 1.04344975 1.5751867371
+## [138,] 0.67224905 0.09788935 0.98680210 0.7880306775
+## [139,] 0.18919584 -0.13153881 0.59026853 0.7880306775
+## [140,] 1.27606556 0.09788935 0.93015445 1.1816087073
+## [141,] 1.03453895 0.09788935 1.04344975 1.5751867371
+## [142,] 1.27606556 0.09788935 0.76021149 1.4439940605
+## [143,] -0.05233076 -0.81982329 0.76021149 0.9192233541
+## [144,] 1.15530226 0.32731751 1.21339271 1.4439940605
+## [145,] 1.03453895 0.55674567 1.10009740 1.7063794137
+## [146,] 1.03453895 -0.13153881 0.81685914 1.4439940605
+## [147,] 0.55148575 -1.27867961 0.70356384 0.9192233541
+## [148,] 0.79301235 -0.13153881 0.81685914 1.0504160307
+## [149,] 0.43072244 0.78617383 0.93015445 1.4439940605
+## [150,] 0.06843254 -0.13153881 0.76021149 0.7880306775
+```
+
+``` r
+# Sepal.Length Sepal.Width Petal.Length Petal.Width
+# [1,] -0.89767388 1.01560199 -1.33575163 -1.3110521482
+# [2,] -1.13920048 -0.13153881 -1.33575163 -1.3110521482
+# [3,] -1.38072709 0.32731751 -1.39239929 -1.3110521482
+# [4,] -1.50149039 0.09788935 -1.27910398 -1.3110521482
+# [5,] -1.01843718 1.24503015 -1.33575163 -1.3110521482
+# [6,] -0.53538397 1.93331463 -1.16580868 -1.0486667950
+# [7,] -1.50149039 0.78617383 -1.33575163 -1.1798594716
+```
+
diff --git a/Desktop/open-source/R/documentation/data_processing.html b/Desktop/open-source/R/documentation/data_processing.html
new file mode 100644
index 00000000..2d033dc7
--- /dev/null
+++ b/Desktop/open-source/R/documentation/data_processing.html
@@ -0,0 +1,358 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(xlsx)
+
+
## Error in library(xlsx): there is no package called 'xlsx'
+
+
## Loading required package: rJava
+## Loading required package: xlsxjars
+
+setwd("/Users/chenfeiyang")
+
+
## Error in setwd("/Users/chenfeiyang"): cannot change working directory
+
+
cameraData <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, header = TRUE)
+
+
## Error in read.xlsx("./data/cameras.xlsx", sheetIndex = 1, header = TRUE): could not find function "read.xlsx"
+
+
cameraData <- read.xlsx("./data/cameras.xlsx", "Baltimore Fixed Speed Cameras",
+ header = TRUE)
+
+
## Error in read.xlsx("./data/cameras.xlsx", "Baltimore Fixed Speed Cameras", : could not find function "read.xlsx"
+
+
head(cameraData)
+
+
## Error in head(cameraData): object 'cameraData' not found
+
+
## address direction street crossStreet
+## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
+## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
+## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
+## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
+## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
+## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
+## intersection Location.1
+## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
+## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
+## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
+## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
+## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
+## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
+
+# Read specific rows and columns in Excel
+colIndex <- 2:3
+rowIndex <- 1:4
+cameraDataSubset <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, colIndex = colIndex,
+ rowIndex = rowIndex)
+
+
## Error in read.xlsx("./data/cameras.xlsx", sheetIndex = 1, colIndex = colIndex, : could not find function "read.xlsx"
+
+
cameraDataSubset
+
+
## Error: object 'cameraDataSubset' not found
+
+
## direction street
+## 1 N/B Caton Ave
+## 2 S/B Caton Ave
+## 3 E/B Wilkens Ave
+
+# Subsetting - quick review
+set.seed(13435)
+X <- data.frame(var1 = sample(1:5), var2 = sample(6:10), var3 = sample(11:15))
+X <- X[sample(1:5), ]
+X$var2[c(1, 3)] = NA
+X
+
+
## var1 var2 var3
+## 5 2 NA 11
+## 4 4 10 12
+## 1 3 NA 14
+## 2 1 7 15
+## 3 5 6 13
+
+
## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+## 3 5 6 14
+## 5 4 9 13
+
+X[, 1]
+
+
## [1] 2 4 3 1 5
+
+
## [1] 2 1 3 5 4
+X[, "var1"]
+
+
## [1] 2 4 3 1 5
+
+
## [1] 2 1 3 5 4
+X[1:2, "var2"]
+
+
## [1] NA 10
+
+
## [1] NA 10
+
+# Logicals and: & , or: |
+X[(X$var1 <= 3 & X$var3 > 11), ]
+
+
## var1 var2 var3
+## 1 3 NA 14
+## 2 1 7 15
+
+
## var1 var2 var3
+## 1 2 NA 15
+## 2 3 NA 12
+X[(X$var1 <= 3 | X$var3 > 15), ]
+
+
## var1 var2 var3
+## 5 2 NA 11
+## 1 3 NA 14
+## 2 1 7 15
+
+
## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+
+## Dealing with missing values
+X[which(X$var2 > 8), ]
+
+
## var1 var2 var3
+## 4 4 10 12
+
+
## var1 var2 var3
+## 4 1 10 11
+## 5 4 9 13
+
+# Sorting
+sort(X$var1)
+
+
## [1] 1 2 3 4 5
+
+
## [1] 1 2 3 4 5
+sort(X$var1, decreasing = TRUE)
+
+
## [1] 5 4 3 2 1
+
+
## [1] 5 4 3 2 1
+sort(X$var2, na.last = TRUE)
+
+
## [1] 6 7 10 NA NA
+
+
## [1] 6 9 10 NA NA
+
+# Ordering
+X[order(X$var1), ]
+
+
## var1 var2 var3
+## 2 1 7 15
+## 5 2 NA 11
+## 1 3 NA 14
+## 4 4 10 12
+## 3 5 6 13
+
+
## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+X[order(X$var1, X$var3), ]
+
+
## var1 var2 var3
+## 2 1 7 15
+## 5 2 NA 11
+## 1 3 NA 14
+## 4 4 10 12
+## 3 5 6 13
+
+
## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+## Sort using the arrange function of the plyr package
+
+library(plyr)
+
+
## Error in library(plyr): there is no package called 'plyr'
+
+
arrange(X, var1)
+
+
## Error in arrange(X, var1): could not find function "arrange"
+
+
## var1 var2 var3
+## 1 1 10 11
+## 2 2 NA 15
+## 3 3 NA 12
+## 4 4 9 13
+## 5 5 6 14
+
+arrange(X, desc(var1))
+
+
## Error in arrange(X, desc(var1)): could not find function "arrange"
+
+
## var1 var2 var3
+## 1 5 6 14
+## 2 4 9 13
+## 3 3 NA 12
+## 4 2 NA 15
+## 5 1 10 11
+
+# Add row and column
+X$var4 <- rnorm(5)
+X
+
+
## var1 var2 var3 var4
+## 5 2 NA 11 -0.4150458
+## 4 4 10 12 2.5437602
+## 1 3 NA 14 1.5545298
+## 2 1 7 15 -0.6192328
+## 3 5 6 13 -0.9261035
+
+
## var1 var2 var3 var4
+## 1 2 NA 15 0.18760
+## 4 1 10 11 1.78698
+## 2 3 NA 12 0.49669
+## 3 5 6 14 0.06318
+## 5 4 9 13 -0.53613
+
+Y <- cbind(X, rnorm(5))
+Y
+
+
## var1 var2 var3 var4 rnorm(5)
+## 5 2 NA 11 -0.4150458 -0.66549949
+## 4 4 10 12 2.5437602 -0.02166735
+## 1 3 NA 14 1.5545298 -0.17411953
+## 2 1 7 15 -0.6192328 0.23900438
+## 3 5 6 13 -0.9261035 -1.83245959
+
+
## var1 var2 var3 var4 rnorm(5)
+## 1 2 NA 15 0.18760 0.62578
+## 4 1 10 11 1.78698 -2.45084
+## 2 3 NA 12 0.49669 0.08909
+## 3 5 6 14 0.06318 0.47839
+## 5 4 9 13 -0.53613 1.00053
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/data_processing.md b/Desktop/open-source/R/documentation/data_processing.md
new file mode 100644
index 00000000..458d1251
--- /dev/null
+++ b/Desktop/open-source/R/documentation/data_processing.md
@@ -0,0 +1,338 @@
+
+
+``` r
+library(xlsx)
+```
+
+```
+## Error in library(xlsx): there is no package called 'xlsx'
+```
+
+``` r
+## Loading required package: rJava
+## Loading required package: xlsxjars
+
+setwd("/Users/chenfeiyang")
+```
+
+```
+## Error in setwd("/Users/chenfeiyang"): cannot change working directory
+```
+
+``` r
+cameraData <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, header = TRUE)
+```
+
+```
+## Error in read.xlsx("./data/cameras.xlsx", sheetIndex = 1, header = TRUE): could not find function "read.xlsx"
+```
+
+``` r
+cameraData <- read.xlsx("./data/cameras.xlsx", "Baltimore Fixed Speed Cameras",
+ header = TRUE)
+```
+
+```
+## Error in read.xlsx("./data/cameras.xlsx", "Baltimore Fixed Speed Cameras", : could not find function "read.xlsx"
+```
+
+``` r
+head(cameraData)
+```
+
+```
+## Error in head(cameraData): object 'cameraData' not found
+```
+
+``` r
+## address direction street crossStreet
+## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
+## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
+## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
+## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
+## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
+## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
+## intersection Location.1
+## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
+## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
+## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
+## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
+## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
+## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
+
+# Read specific rows and columns in Excel
+colIndex <- 2:3
+rowIndex <- 1:4
+cameraDataSubset <- read.xlsx("./data/cameras.xlsx", sheetIndex = 1, colIndex = colIndex,
+ rowIndex = rowIndex)
+```
+
+```
+## Error in read.xlsx("./data/cameras.xlsx", sheetIndex = 1, colIndex = colIndex, : could not find function "read.xlsx"
+```
+
+``` r
+cameraDataSubset
+```
+
+```
+## Error: object 'cameraDataSubset' not found
+```
+
+``` r
+## direction street
+## 1 N/B Caton Ave
+## 2 S/B Caton Ave
+## 3 E/B Wilkens Ave
+
+# Subsetting - quick review
+set.seed(13435)
+X <- data.frame(var1 = sample(1:5), var2 = sample(6:10), var3 = sample(11:15))
+X <- X[sample(1:5), ]
+X$var2[c(1, 3)] = NA
+X
+```
+
+```
+## var1 var2 var3
+## 5 2 NA 11
+## 4 4 10 12
+## 1 3 NA 14
+## 2 1 7 15
+## 3 5 6 13
+```
+
+``` r
+## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+## 3 5 6 14
+## 5 4 9 13
+
+X[, 1]
+```
+
+```
+## [1] 2 4 3 1 5
+```
+
+``` r
+## [1] 2 1 3 5 4
+X[, "var1"]
+```
+
+```
+## [1] 2 4 3 1 5
+```
+
+``` r
+## [1] 2 1 3 5 4
+X[1:2, "var2"]
+```
+
+```
+## [1] NA 10
+```
+
+``` r
+## [1] NA 10
+
+# Logicals and: & , or: |
+X[(X$var1 <= 3 & X$var3 > 11), ]
+```
+
+```
+## var1 var2 var3
+## 1 3 NA 14
+## 2 1 7 15
+```
+
+``` r
+## var1 var2 var3
+## 1 2 NA 15
+## 2 3 NA 12
+X[(X$var1 <= 3 | X$var3 > 15), ]
+```
+
+```
+## var1 var2 var3
+## 5 2 NA 11
+## 1 3 NA 14
+## 2 1 7 15
+```
+
+``` r
+## var1 var2 var3
+## 1 2 NA 15
+## 4 1 10 11
+## 2 3 NA 12
+
+## Dealing with missing values
+X[which(X$var2 > 8), ]
+```
+
+```
+## var1 var2 var3
+## 4 4 10 12
+```
+
+``` r
+## var1 var2 var3
+## 4 1 10 11
+## 5 4 9 13
+
+# Sorting
+sort(X$var1)
+```
+
+```
+## [1] 1 2 3 4 5
+```
+
+``` r
+## [1] 1 2 3 4 5
+sort(X$var1, decreasing = TRUE)
+```
+
+```
+## [1] 5 4 3 2 1
+```
+
+``` r
+## [1] 5 4 3 2 1
+sort(X$var2, na.last = TRUE)
+```
+
+```
+## [1] 6 7 10 NA NA
+```
+
+``` r
+## [1] 6 9 10 NA NA
+
+# Ordering
+X[order(X$var1), ]
+```
+
+```
+## var1 var2 var3
+## 2 1 7 15
+## 5 2 NA 11
+## 1 3 NA 14
+## 4 4 10 12
+## 3 5 6 13
+```
+
+``` r
+## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+X[order(X$var1, X$var3), ]
+```
+
+```
+## var1 var2 var3
+## 2 1 7 15
+## 5 2 NA 11
+## 1 3 NA 14
+## 4 4 10 12
+## 3 5 6 13
+```
+
+``` r
+## var1 var2 var3
+## 4 1 10 11
+## 1 2 NA 15
+## 2 3 NA 12
+## 5 4 9 13
+## 3 5 6 14
+
+## Sort using the arrange function of the plyr package
+
+library(plyr)
+```
+
+```
+## Error in library(plyr): there is no package called 'plyr'
+```
+
+``` r
+arrange(X, var1)
+```
+
+```
+## Error in arrange(X, var1): could not find function "arrange"
+```
+
+``` r
+## var1 var2 var3
+## 1 1 10 11
+## 2 2 NA 15
+## 3 3 NA 12
+## 4 4 9 13
+## 5 5 6 14
+
+arrange(X, desc(var1))
+```
+
+```
+## Error in arrange(X, desc(var1)): could not find function "arrange"
+```
+
+``` r
+## var1 var2 var3
+## 1 5 6 14
+## 2 4 9 13
+## 3 3 NA 12
+## 4 2 NA 15
+## 5 1 10 11
+
+# Add row and column
+X$var4 <- rnorm(5)
+X
+```
+
+```
+## var1 var2 var3 var4
+## 5 2 NA 11 -0.4150458
+## 4 4 10 12 2.5437602
+## 1 3 NA 14 1.5545298
+## 2 1 7 15 -0.6192328
+## 3 5 6 13 -0.9261035
+```
+
+``` r
+## var1 var2 var3 var4
+## 1 2 NA 15 0.18760
+## 4 1 10 11 1.78698
+## 2 3 NA 12 0.49669
+## 3 5 6 14 0.06318
+## 5 4 9 13 -0.53613
+
+Y <- cbind(X, rnorm(5))
+Y
+```
+
+```
+## var1 var2 var3 var4 rnorm(5)
+## 5 2 NA 11 -0.4150458 -0.66549949
+## 4 4 10 12 2.5437602 -0.02166735
+## 1 3 NA 14 1.5545298 -0.17411953
+## 2 1 7 15 -0.6192328 0.23900438
+## 3 5 6 13 -0.9261035 -1.83245959
+```
+
+``` r
+## var1 var2 var3 var4 rnorm(5)
+## 1 2 NA 15 0.18760 0.62578
+## 4 1 10 11 1.78698 -2.45084
+## 2 3 NA 12 0.49669 0.08909
+## 3 5 6 14 0.06318 0.47839
+## 5 4 9 13 -0.53613 1.00053
+```
+
diff --git a/Desktop/open-source/R/documentation/dbscan_clustering.html b/Desktop/open-source/R/documentation/dbscan_clustering.html
new file mode 100644
index 00000000..c34e6083
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dbscan_clustering.html
@@ -0,0 +1,132 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(dbscan)
+
+
## Error in library(dbscan): there is no package called 'dbscan'
+
+
cl <- dbscan(iris[,-5], eps = .5, minPts = 5)
+
+
## Error in dbscan(iris[, -5], eps = 0.5, minPts = 5): could not find function "dbscan"
+
+
plot(iris[,-5], col = cl$cluster)
+
+
## Error in pairs.default(data.matrix(x), ...): object 'cl' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/dbscan_clustering.md b/Desktop/open-source/R/documentation/dbscan_clustering.md
new file mode 100644
index 00000000..a082d545
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dbscan_clustering.md
@@ -0,0 +1,26 @@
+
+
+``` r
+library(dbscan)
+```
+
+```
+## Error in library(dbscan): there is no package called 'dbscan'
+```
+
+``` r
+cl <- dbscan(iris[,-5], eps = .5, minPts = 5)
+```
+
+```
+## Error in dbscan(iris[, -5], eps = 0.5, minPts = 5): could not find function "dbscan"
+```
+
+``` r
+plot(iris[,-5], col = cl$cluster)
+```
+
+```
+## Error in pairs.default(data.matrix(x), ...): object 'cl' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/decision_tree.html b/Desktop/open-source/R/documentation/decision_tree.html
new file mode 100644
index 00000000..85037ff5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/decision_tree.html
@@ -0,0 +1,139 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(rpart)
+x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# grow tree
+fit <- rpart(y_train ~ ., data = x,method="class")
+
+
## Error in model.frame.default(formula = y_train ~ ., data = x, na.action = function (x) : 'data' must be a data.frame, environment, or list
+
+
summary(fit)
+
+
## Error in summary(fit): object 'fit' not found
+
+
# Predict Output
+predicted= predict(fit,x_test)
+
+
## Error in predict(fit, x_test): object 'fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/decision_tree.md b/Desktop/open-source/R/documentation/decision_tree.md
new file mode 100644
index 00000000..de23c9ce
--- /dev/null
+++ b/Desktop/open-source/R/documentation/decision_tree.md
@@ -0,0 +1,37 @@
+
+
+``` r
+library(rpart)
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# grow tree
+fit <- rpart(y_train ~ ., data = x,method="class")
+```
+
+```
+## Error in model.frame.default(formula = y_train ~ ., data = x, na.action = function (x) : 'data' must be a data.frame, environment, or list
+```
+
+``` r
+summary(fit)
+```
+
+```
+## Error in summary(fit): object 'fit' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(fit,x_test)
+```
+
+```
+## Error in predict(fit, x_test): object 'fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/depth_first_search.html b/Desktop/open-source/R/documentation/depth_first_search.html
new file mode 100644
index 00000000..aed783ef
--- /dev/null
+++ b/Desktop/open-source/R/documentation/depth_first_search.html
@@ -0,0 +1,309 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Depth-First Search (DFS) Algorithm
+#
+# DFS is a graph traversal algorithm that explores as far as possible along each branch
+# before backtracking. It uses a stack data structure (implemented via recursion here).
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and recursion stack
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during DFS traversal
+
+# Recursive DFS function
+dfs_recursive <- function(graph, vertex, visited, result) {
+ # Mark current vertex as visited
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Visit all unvisited adjacent vertices
+ if (vertex %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ result <- dfs_recursive(graph, neighbor, visited, result)
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Main DFS function
+depth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+
+ # Perform DFS starting from the given vertex
+ result <- dfs_recursive(graph, start_vertex, visited, c())
+
+ return(result)
+}
+
+# Iterative DFS function using explicit stack
+dfs_iterative <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and stack
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ stack <- c(start_vertex)
+ result <- c()
+
+ while (length(stack) > 0) {
+ # Pop vertex from stack
+ vertex <- stack[length(stack)]
+ stack <- stack[-length(stack)]
+
+ if (!visited[vertex]) {
+ # Mark as visited and add to result
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to stack (in reverse order to maintain left-to-right traversal)
+ if (as.character(vertex) %in% names(graph)) {
+ neighbors <- graph[[as.character(vertex)]]
+ for (neighbor in rev(neighbors)) {
+ if (!visited[neighbor]) {
+ stack <- c(stack, neighbor)
+ }
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Example usage and testing
+cat("=== Depth-First Search (DFS) Algorithm ===\n")
+
+
## === Depth-First Search (DFS) Algorithm ===
+
+
# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+
+
## Graph structure (adjacency list):
+
+
for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+
+
## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 4, 5 ]
+## Vertex 3 -> [ 6 ]
+## Vertex 4 -> [ ]
+## Vertex 5 -> [ ]
+## Vertex 6 -> [ ]
+
+
# Test recursive DFS
+cat("\nRecursive DFS starting from vertex 1:\n")
+
+
##
+## Recursive DFS starting from vertex 1:
+
+
result_recursive <- depth_first_search(graph, 1)
+cat("Traversal order:", paste(result_recursive, collapse = " -> "), "\n")
+
+
## Traversal order: 1 -> 2 -> 4 -> 5 -> 3 -> 6
+
+
# Test iterative DFS
+cat("\nIterative DFS starting from vertex 1:\n")
+
+
##
+## Iterative DFS starting from vertex 1:
+
+
result_iterative <- dfs_iterative(graph, 1)
+cat("Traversal order:", paste(result_iterative, collapse = " -> "), "\n")
+
+
## Traversal order: 1 -> 2 -> 4 -> 5 -> 3 -> 6
+
+
# Test with different starting vertex
+cat("\nRecursive DFS starting from vertex 2:\n")
+
+
##
+## Recursive DFS starting from vertex 2:
+
+
result_from_2 <- depth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+
+
## Traversal order: 2 -> 4 -> 5
+
+
# Example with a more complex graph (with cycles)
+cat("\n=== Example with Cyclic Graph ===\n")
+
+
##
+## === Example with Cyclic Graph ===
+
+
cyclic_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4),
+ "3" = c(1, 5),
+ "4" = c(2, 6),
+ "5" = c(3, 6),
+ "6" = c(4, 5)
+)
+
+cat("Cyclic graph structure:\n")
+
+
## Cyclic graph structure:
+
+
for (vertex in names(cyclic_graph)) {
+ cat("Vertex", vertex, "-> [", paste(cyclic_graph[[vertex]], collapse = ", "), "]\n")
+}
+
+
## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 1, 4 ]
+## Vertex 3 -> [ 1, 5 ]
+## Vertex 4 -> [ 2, 6 ]
+## Vertex 5 -> [ 3, 6 ]
+## Vertex 6 -> [ 4, 5 ]
+
+
cat("\nDFS on cyclic graph starting from vertex 1:\n")
+
+
##
+## DFS on cyclic graph starting from vertex 1:
+
+
cyclic_result <- depth_first_search(cyclic_graph, 1)
+cat("Traversal order:", paste(cyclic_result, collapse = " -> "), "\n")
+
+
## Traversal order: 1 -> 2 -> 4 -> 6 -> 5 -> 3 -> 3 -> 5 -> 6 -> 4 -> 2
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/depth_first_search.md b/Desktop/open-source/R/documentation/depth_first_search.md
new file mode 100644
index 00000000..31ea24c5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/depth_first_search.md
@@ -0,0 +1,247 @@
+
+
+``` r
+# Depth-First Search (DFS) Algorithm
+#
+# DFS is a graph traversal algorithm that explores as far as possible along each branch
+# before backtracking. It uses a stack data structure (implemented via recursion here).
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and recursion stack
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during DFS traversal
+
+# Recursive DFS function
+dfs_recursive <- function(graph, vertex, visited, result) {
+ # Mark current vertex as visited
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Visit all unvisited adjacent vertices
+ if (vertex %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ result <- dfs_recursive(graph, neighbor, visited, result)
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Main DFS function
+depth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+
+ # Perform DFS starting from the given vertex
+ result <- dfs_recursive(graph, start_vertex, visited, c())
+
+ return(result)
+}
+
+# Iterative DFS function using explicit stack
+dfs_iterative <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and stack
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ stack <- c(start_vertex)
+ result <- c()
+
+ while (length(stack) > 0) {
+ # Pop vertex from stack
+ vertex <- stack[length(stack)]
+ stack <- stack[-length(stack)]
+
+ if (!visited[vertex]) {
+ # Mark as visited and add to result
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to stack (in reverse order to maintain left-to-right traversal)
+ if (as.character(vertex) %in% names(graph)) {
+ neighbors <- graph[[as.character(vertex)]]
+ for (neighbor in rev(neighbors)) {
+ if (!visited[neighbor]) {
+ stack <- c(stack, neighbor)
+ }
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Example usage and testing
+cat("=== Depth-First Search (DFS) Algorithm ===\n")
+```
+
+```
+## === Depth-First Search (DFS) Algorithm ===
+```
+
+``` r
+# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+```
+
+```
+## Graph structure (adjacency list):
+```
+
+``` r
+for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+```
+
+```
+## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 4, 5 ]
+## Vertex 3 -> [ 6 ]
+## Vertex 4 -> [ ]
+## Vertex 5 -> [ ]
+## Vertex 6 -> [ ]
+```
+
+``` r
+# Test recursive DFS
+cat("\nRecursive DFS starting from vertex 1:\n")
+```
+
+```
+##
+## Recursive DFS starting from vertex 1:
+```
+
+``` r
+result_recursive <- depth_first_search(graph, 1)
+cat("Traversal order:", paste(result_recursive, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 1 -> 2 -> 4 -> 5 -> 3 -> 6
+```
+
+``` r
+# Test iterative DFS
+cat("\nIterative DFS starting from vertex 1:\n")
+```
+
+```
+##
+## Iterative DFS starting from vertex 1:
+```
+
+``` r
+result_iterative <- dfs_iterative(graph, 1)
+cat("Traversal order:", paste(result_iterative, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 1 -> 2 -> 4 -> 5 -> 3 -> 6
+```
+
+``` r
+# Test with different starting vertex
+cat("\nRecursive DFS starting from vertex 2:\n")
+```
+
+```
+##
+## Recursive DFS starting from vertex 2:
+```
+
+``` r
+result_from_2 <- depth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 2 -> 4 -> 5
+```
+
+``` r
+# Example with a more complex graph (with cycles)
+cat("\n=== Example with Cyclic Graph ===\n")
+```
+
+```
+##
+## === Example with Cyclic Graph ===
+```
+
+``` r
+cyclic_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4),
+ "3" = c(1, 5),
+ "4" = c(2, 6),
+ "5" = c(3, 6),
+ "6" = c(4, 5)
+)
+
+cat("Cyclic graph structure:\n")
+```
+
+```
+## Cyclic graph structure:
+```
+
+``` r
+for (vertex in names(cyclic_graph)) {
+ cat("Vertex", vertex, "-> [", paste(cyclic_graph[[vertex]], collapse = ", "), "]\n")
+}
+```
+
+```
+## Vertex 1 -> [ 2, 3 ]
+## Vertex 2 -> [ 1, 4 ]
+## Vertex 3 -> [ 1, 5 ]
+## Vertex 4 -> [ 2, 6 ]
+## Vertex 5 -> [ 3, 6 ]
+## Vertex 6 -> [ 4, 5 ]
+```
+
+``` r
+cat("\nDFS on cyclic graph starting from vertex 1:\n")
+```
+
+```
+##
+## DFS on cyclic graph starting from vertex 1:
+```
+
+``` r
+cyclic_result <- depth_first_search(cyclic_graph, 1)
+cat("Traversal order:", paste(cyclic_result, collapse = " -> "), "\n")
+```
+
+```
+## Traversal order: 1 -> 2 -> 4 -> 6 -> 5 -> 3 -> 3 -> 5 -> 6 -> 4 -> 2
+```
+
diff --git a/Desktop/open-source/R/documentation/dijkstra_shortest_path.html b/Desktop/open-source/R/documentation/dijkstra_shortest_path.html
new file mode 100644
index 00000000..134a3d81
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dijkstra_shortest_path.html
@@ -0,0 +1,440 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Dijkstra's Shortest Path Algorithm
+#
+# Dijkstra's algorithm finds the shortest path between a source vertex and all other vertices
+# in a weighted graph with non-negative edge weights. It uses a greedy approach with a priority queue.
+#
+# Time Complexity: O((V + E) log V) with binary heap, O(V^2) with simple array
+# Space Complexity: O(V) for distance and visited arrays
+#
+# Input: A weighted graph represented as adjacency list with weights, and a source vertex
+# Output: Shortest distances from source to all vertices and the paths
+
+# Priority queue implementation using simple vector (for educational purposes)
+# In production, use more efficient data structures
+create_priority_queue <- function() {
+ list(
+ elements = data.frame(vertex = integer(0), distance = numeric(0)),
+ size = 0
+ )
+}
+
+# Insert element into priority queue
+pq_insert <- function(pq, vertex, distance) {
+ pq$elements <- rbind(pq$elements, data.frame(vertex = vertex, distance = distance))
+ pq$size <- pq$size + 1
+ return(pq)
+}
+
+# Extract minimum element from priority queue
+pq_extract_min <- function(pq) {
+ if (pq$size == 0) {
+ return(list(pq = pq, min_element = NULL))
+ }
+
+ min_idx <- which.min(pq$elements$distance)
+ min_element <- pq$elements[min_idx, ]
+ pq$elements <- pq$elements[-min_idx, ]
+ pq$size <- pq$size - 1
+
+ return(list(pq = pq, min_element = min_element))
+}
+
+# Check if priority queue is empty
+pq_is_empty <- function(pq) {
+ return(pq$size == 0)
+}
+
+# Main Dijkstra's algorithm implementation
+dijkstra_shortest_path <- function(graph, source) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(lapply(graph, function(x) x$vertex))))
+ num_vertices <- max(all_vertices)
+
+ # Initialize distances and previous vertices
+ distances <- rep(Inf, num_vertices)
+ previous <- rep(-1, num_vertices)
+ visited <- rep(FALSE, num_vertices)
+
+ # Set source distance to 0
+ distances[source] <- 0
+
+ # Create priority queue and add source
+ pq <- create_priority_queue()
+ pq <- pq_insert(pq, source, 0)
+
+ while (!pq_is_empty(pq)) {
+ # Extract vertex with minimum distance
+ result <- pq_extract_min(pq)
+ pq <- result$pq
+ current <- result$min_element
+
+ if (is.null(current)) break
+
+ u <- current$vertex
+
+ # Skip if already visited
+ if (visited[u]) next
+
+ # Mark as visited
+ visited[u] <- TRUE
+
+ # Update distances to neighbors
+ if (as.character(u) %in% names(graph)) {
+ for (edge in graph[[as.character(u)]]) {
+ v <- edge$vertex
+ weight <- edge$weight
+
+ # Relaxation step
+ if (!visited[v] && distances[u] + weight < distances[v]) {
+ distances[v] <- distances[u] + weight
+ previous[v] <- u
+ pq <- pq_insert(pq, v, distances[v])
+ }
+ }
+ }
+ }
+
+ return(list(
+ distances = distances,
+ previous = previous
+ ))
+}
+
+# Reconstruct shortest path from source to target
+get_shortest_path <- function(dijkstra_result, source, target) {
+ previous <- dijkstra_result$previous
+ distances <- dijkstra_result$distances
+
+ # Check if target is reachable
+ if (distances[target] == Inf) {
+ return(list(
+ path = NULL,
+ distance = Inf
+ ))
+ }
+
+ # Reconstruct path by backtracking
+ path <- c()
+ current <- target
+
+ while (current != -1) {
+ path <- c(current, path)
+ current <- previous[current]
+ }
+
+ return(list(
+ path = path,
+ distance = distances[target]
+ ))
+}
+
+# Find shortest paths to all vertices
+get_all_shortest_paths <- function(dijkstra_result, source) {
+ distances <- dijkstra_result$distances
+ previous <- dijkstra_result$previous
+ paths <- list()
+
+ for (target in 1:length(distances)) {
+ if (distances[target] != Inf) {
+ path_result <- get_shortest_path(dijkstra_result, source, target)
+ paths[[as.character(target)]] <- path_result
+ }
+ }
+
+ return(paths)
+}
+
+# Example usage and testing
+cat("=== Dijkstra's Shortest Path Algorithm ===\n")
+
+
## === Dijkstra's Shortest Path Algorithm ===
+
+
# Create a weighted graph as adjacency list
+# Graph structure with weights:
+# 1
+# / \
+# 4/ \2
+# / \
+# 2 3
+# |3 /1
+# | /
+# 4-----5
+# 2
+weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 4),
+ list(vertex = 3, weight = 2)
+ ),
+ "2" = list(
+ list(vertex = 4, weight = 3)
+ ),
+ "3" = list(
+ list(vertex = 5, weight = 1)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 2)
+ ),
+ "5" = list()
+)
+
+cat("Weighted graph structure:\n")
+
+
## Weighted graph structure:
+
+
for (vertex in names(weighted_graph)) {
+ edges <- weighted_graph[[vertex]]
+ if (length(edges) > 0) {
+ edge_strs <- sapply(edges, function(e) paste0(e$vertex, "(", e$weight, ")"))
+ cat("Vertex", vertex, "-> [", paste(edge_strs, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", vertex, "-> []\n")
+ }
+}
+
+
## Vertex 1 -> [ 2(4), 3(2) ]
+## Vertex 2 -> [ 4(3) ]
+## Vertex 3 -> [ 5(1) ]
+## Vertex 4 -> [ 5(2) ]
+## Vertex 5 -> []
+
+
# Run Dijkstra's algorithm from vertex 1
+cat("\nRunning Dijkstra's algorithm from vertex 1:\n")
+
+
##
+## Running Dijkstra's algorithm from vertex 1:
+
+
result <- dijkstra_shortest_path(weighted_graph, 1)
+
+# Display shortest distances
+cat("Shortest distances from vertex 1:\n")
+
+
## Shortest distances from vertex 1:
+
+
for (i in 1:length(result$distances)) {
+ if (result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", result$distances[i], "\n")
+ }
+}
+
+
## To vertex 1 : distance = 0
+## To vertex 2 : distance = 4
+## To vertex 3 : distance = 2
+## To vertex 4 : distance = 7
+## To vertex 5 : distance = 3
+
+
# Get shortest path to specific vertex
+cat("\nShortest path from 1 to 5:\n")
+
+
##
+## Shortest path from 1 to 5:
+
+
path_to_5 <- get_shortest_path(result, 1, 5)
+if (!is.null(path_to_5$path)) {
+ cat("Path:", paste(path_to_5$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5$distance, "\n")
+}
+
+
## Path: 1 -> 3 -> 5
+## Distance: 3
+
+
# Get all shortest paths
+cat("\nAll shortest paths from vertex 1:\n")
+
+
##
+## All shortest paths from vertex 1:
+
+
all_paths <- get_all_shortest_paths(result, 1)
+for (target in names(all_paths)) {
+ path_info <- all_paths[[target]]
+ cat("To vertex", target, ": ", paste(path_info$path, collapse = " -> "),
+ " (distance:", path_info$distance, ")\n")
+}
+
+
## To vertex 1 : 1 (distance: 0 )
+## To vertex 2 : 1 -> 2 (distance: 4 )
+## To vertex 3 : 1 -> 3 (distance: 2 )
+## To vertex 4 : 1 -> 2 -> 4 (distance: 7 )
+## To vertex 5 : 1 -> 3 -> 5 (distance: 3 )
+
+
# Example with a more complex graph
+cat("\n=== More Complex Weighted Graph Example ===\n")
+
+
##
+## === More Complex Weighted Graph Example ===
+
+
complex_weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 7),
+ list(vertex = 3, weight = 9),
+ list(vertex = 6, weight = 14)
+ ),
+ "2" = list(
+ list(vertex = 3, weight = 10),
+ list(vertex = 4, weight = 15)
+ ),
+ "3" = list(
+ list(vertex = 4, weight = 11),
+ list(vertex = 6, weight = 2)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 6)
+ ),
+ "5" = list(),
+ "6" = list(
+ list(vertex = 5, weight = 9)
+ )
+)
+
+cat("Complex weighted graph from vertex 1:\n")
+
+
## Complex weighted graph from vertex 1:
+
+
complex_result <- dijkstra_shortest_path(complex_weighted_graph, 1)
+
+cat("Shortest distances:\n")
+
+
## Shortest distances:
+
+
for (i in 1:length(complex_result$distances)) {
+ if (complex_result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", complex_result$distances[i], "\n")
+ }
+}
+
+
## To vertex 1 : distance = 0
+## To vertex 2 : distance = 7
+## To vertex 3 : distance = 9
+## To vertex 4 : distance = 20
+## To vertex 5 : distance = 20
+## To vertex 6 : distance = 11
+
+
# Shortest path to vertex 5
+path_to_5_complex <- get_shortest_path(complex_result, 1, 5)
+if (!is.null(path_to_5_complex$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_to_5_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5_complex$distance, "\n")
+}
+
+
## Shortest path from 1 to 5: 1 -> 3 -> 6 -> 5
+## Distance: 20
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/dijkstra_shortest_path.md b/Desktop/open-source/R/documentation/dijkstra_shortest_path.md
new file mode 100644
index 00000000..41868d84
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dijkstra_shortest_path.md
@@ -0,0 +1,382 @@
+
+
+``` r
+# Dijkstra's Shortest Path Algorithm
+#
+# Dijkstra's algorithm finds the shortest path between a source vertex and all other vertices
+# in a weighted graph with non-negative edge weights. It uses a greedy approach with a priority queue.
+#
+# Time Complexity: O((V + E) log V) with binary heap, O(V^2) with simple array
+# Space Complexity: O(V) for distance and visited arrays
+#
+# Input: A weighted graph represented as adjacency list with weights, and a source vertex
+# Output: Shortest distances from source to all vertices and the paths
+
+# Priority queue implementation using simple vector (for educational purposes)
+# In production, use more efficient data structures
+create_priority_queue <- function() {
+ list(
+ elements = data.frame(vertex = integer(0), distance = numeric(0)),
+ size = 0
+ )
+}
+
+# Insert element into priority queue
+pq_insert <- function(pq, vertex, distance) {
+ pq$elements <- rbind(pq$elements, data.frame(vertex = vertex, distance = distance))
+ pq$size <- pq$size + 1
+ return(pq)
+}
+
+# Extract minimum element from priority queue
+pq_extract_min <- function(pq) {
+ if (pq$size == 0) {
+ return(list(pq = pq, min_element = NULL))
+ }
+
+ min_idx <- which.min(pq$elements$distance)
+ min_element <- pq$elements[min_idx, ]
+ pq$elements <- pq$elements[-min_idx, ]
+ pq$size <- pq$size - 1
+
+ return(list(pq = pq, min_element = min_element))
+}
+
+# Check if priority queue is empty
+pq_is_empty <- function(pq) {
+ return(pq$size == 0)
+}
+
+# Main Dijkstra's algorithm implementation
+dijkstra_shortest_path <- function(graph, source) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(lapply(graph, function(x) x$vertex))))
+ num_vertices <- max(all_vertices)
+
+ # Initialize distances and previous vertices
+ distances <- rep(Inf, num_vertices)
+ previous <- rep(-1, num_vertices)
+ visited <- rep(FALSE, num_vertices)
+
+ # Set source distance to 0
+ distances[source] <- 0
+
+ # Create priority queue and add source
+ pq <- create_priority_queue()
+ pq <- pq_insert(pq, source, 0)
+
+ while (!pq_is_empty(pq)) {
+ # Extract vertex with minimum distance
+ result <- pq_extract_min(pq)
+ pq <- result$pq
+ current <- result$min_element
+
+ if (is.null(current)) break
+
+ u <- current$vertex
+
+ # Skip if already visited
+ if (visited[u]) next
+
+ # Mark as visited
+ visited[u] <- TRUE
+
+ # Update distances to neighbors
+ if (as.character(u) %in% names(graph)) {
+ for (edge in graph[[as.character(u)]]) {
+ v <- edge$vertex
+ weight <- edge$weight
+
+ # Relaxation step
+ if (!visited[v] && distances[u] + weight < distances[v]) {
+ distances[v] <- distances[u] + weight
+ previous[v] <- u
+ pq <- pq_insert(pq, v, distances[v])
+ }
+ }
+ }
+ }
+
+ return(list(
+ distances = distances,
+ previous = previous
+ ))
+}
+
+# Reconstruct shortest path from source to target
+get_shortest_path <- function(dijkstra_result, source, target) {
+ previous <- dijkstra_result$previous
+ distances <- dijkstra_result$distances
+
+ # Check if target is reachable
+ if (distances[target] == Inf) {
+ return(list(
+ path = NULL,
+ distance = Inf
+ ))
+ }
+
+ # Reconstruct path by backtracking
+ path <- c()
+ current <- target
+
+ while (current != -1) {
+ path <- c(current, path)
+ current <- previous[current]
+ }
+
+ return(list(
+ path = path,
+ distance = distances[target]
+ ))
+}
+
+# Find shortest paths to all vertices
+get_all_shortest_paths <- function(dijkstra_result, source) {
+ distances <- dijkstra_result$distances
+ previous <- dijkstra_result$previous
+ paths <- list()
+
+ for (target in 1:length(distances)) {
+ if (distances[target] != Inf) {
+ path_result <- get_shortest_path(dijkstra_result, source, target)
+ paths[[as.character(target)]] <- path_result
+ }
+ }
+
+ return(paths)
+}
+
+# Example usage and testing
+cat("=== Dijkstra's Shortest Path Algorithm ===\n")
+```
+
+```
+## === Dijkstra's Shortest Path Algorithm ===
+```
+
+``` r
+# Create a weighted graph as adjacency list
+# Graph structure with weights:
+# 1
+# / \
+# 4/ \2
+# / \
+# 2 3
+# |3 /1
+# | /
+# 4-----5
+# 2
+weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 4),
+ list(vertex = 3, weight = 2)
+ ),
+ "2" = list(
+ list(vertex = 4, weight = 3)
+ ),
+ "3" = list(
+ list(vertex = 5, weight = 1)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 2)
+ ),
+ "5" = list()
+)
+
+cat("Weighted graph structure:\n")
+```
+
+```
+## Weighted graph structure:
+```
+
+``` r
+for (vertex in names(weighted_graph)) {
+ edges <- weighted_graph[[vertex]]
+ if (length(edges) > 0) {
+ edge_strs <- sapply(edges, function(e) paste0(e$vertex, "(", e$weight, ")"))
+ cat("Vertex", vertex, "-> [", paste(edge_strs, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", vertex, "-> []\n")
+ }
+}
+```
+
+```
+## Vertex 1 -> [ 2(4), 3(2) ]
+## Vertex 2 -> [ 4(3) ]
+## Vertex 3 -> [ 5(1) ]
+## Vertex 4 -> [ 5(2) ]
+## Vertex 5 -> []
+```
+
+``` r
+# Run Dijkstra's algorithm from vertex 1
+cat("\nRunning Dijkstra's algorithm from vertex 1:\n")
+```
+
+```
+##
+## Running Dijkstra's algorithm from vertex 1:
+```
+
+``` r
+result <- dijkstra_shortest_path(weighted_graph, 1)
+
+# Display shortest distances
+cat("Shortest distances from vertex 1:\n")
+```
+
+```
+## Shortest distances from vertex 1:
+```
+
+``` r
+for (i in 1:length(result$distances)) {
+ if (result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", result$distances[i], "\n")
+ }
+}
+```
+
+```
+## To vertex 1 : distance = 0
+## To vertex 2 : distance = 4
+## To vertex 3 : distance = 2
+## To vertex 4 : distance = 7
+## To vertex 5 : distance = 3
+```
+
+``` r
+# Get shortest path to specific vertex
+cat("\nShortest path from 1 to 5:\n")
+```
+
+```
+##
+## Shortest path from 1 to 5:
+```
+
+``` r
+path_to_5 <- get_shortest_path(result, 1, 5)
+if (!is.null(path_to_5$path)) {
+ cat("Path:", paste(path_to_5$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5$distance, "\n")
+}
+```
+
+```
+## Path: 1 -> 3 -> 5
+## Distance: 3
+```
+
+``` r
+# Get all shortest paths
+cat("\nAll shortest paths from vertex 1:\n")
+```
+
+```
+##
+## All shortest paths from vertex 1:
+```
+
+``` r
+all_paths <- get_all_shortest_paths(result, 1)
+for (target in names(all_paths)) {
+ path_info <- all_paths[[target]]
+ cat("To vertex", target, ": ", paste(path_info$path, collapse = " -> "),
+ " (distance:", path_info$distance, ")\n")
+}
+```
+
+```
+## To vertex 1 : 1 (distance: 0 )
+## To vertex 2 : 1 -> 2 (distance: 4 )
+## To vertex 3 : 1 -> 3 (distance: 2 )
+## To vertex 4 : 1 -> 2 -> 4 (distance: 7 )
+## To vertex 5 : 1 -> 3 -> 5 (distance: 3 )
+```
+
+``` r
+# Example with a more complex graph
+cat("\n=== More Complex Weighted Graph Example ===\n")
+```
+
+```
+##
+## === More Complex Weighted Graph Example ===
+```
+
+``` r
+complex_weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 7),
+ list(vertex = 3, weight = 9),
+ list(vertex = 6, weight = 14)
+ ),
+ "2" = list(
+ list(vertex = 3, weight = 10),
+ list(vertex = 4, weight = 15)
+ ),
+ "3" = list(
+ list(vertex = 4, weight = 11),
+ list(vertex = 6, weight = 2)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 6)
+ ),
+ "5" = list(),
+ "6" = list(
+ list(vertex = 5, weight = 9)
+ )
+)
+
+cat("Complex weighted graph from vertex 1:\n")
+```
+
+```
+## Complex weighted graph from vertex 1:
+```
+
+``` r
+complex_result <- dijkstra_shortest_path(complex_weighted_graph, 1)
+
+cat("Shortest distances:\n")
+```
+
+```
+## Shortest distances:
+```
+
+``` r
+for (i in 1:length(complex_result$distances)) {
+ if (complex_result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", complex_result$distances[i], "\n")
+ }
+}
+```
+
+```
+## To vertex 1 : distance = 0
+## To vertex 2 : distance = 7
+## To vertex 3 : distance = 9
+## To vertex 4 : distance = 20
+## To vertex 5 : distance = 20
+## To vertex 6 : distance = 11
+```
+
+``` r
+# Shortest path to vertex 5
+path_to_5_complex <- get_shortest_path(complex_result, 1, 5)
+if (!is.null(path_to_5_complex$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_to_5_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5_complex$distance, "\n")
+}
+```
+
+```
+## Shortest path from 1 to 5: 1 -> 3 -> 6 -> 5
+## Distance: 20
+```
+
diff --git a/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.html b/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.html
new file mode 100644
index 00000000..9105bd39
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.html
@@ -0,0 +1,133 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(stats)
+pca <- princomp(train, cor = TRUE)
+
+
## Error in princomp(train, cor = TRUE): object 'train' not found
+
+
train_reduced <- predict(pca,train)
+
+
## Error in predict(pca, train): object 'pca' not found
+
+
test_reduced <- predict(pca,test)
+
+
## Error in predict(pca, test): object 'pca' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.md b/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.md
new file mode 100644
index 00000000..ae21219f
--- /dev/null
+++ b/Desktop/open-source/R/documentation/dimensionality_reduction_algorithms.md
@@ -0,0 +1,27 @@
+
+
+``` r
+library(stats)
+pca <- princomp(train, cor = TRUE)
+```
+
+```
+## Error in princomp(train, cor = TRUE): object 'train' not found
+```
+
+``` r
+train_reduced <- predict(pca,train)
+```
+
+```
+## Error in predict(pca, train): object 'pca' not found
+```
+
+``` r
+test_reduced <- predict(pca,test)
+```
+
+```
+## Error in predict(pca, test): object 'pca' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/figure/unnamed-chunk-1-1.png b/Desktop/open-source/R/documentation/figure/unnamed-chunk-1-1.png
new file mode 100644
index 00000000..f6226f50
Binary files /dev/null and b/Desktop/open-source/R/documentation/figure/unnamed-chunk-1-1.png differ
diff --git a/Desktop/open-source/R/documentation/gmm.html b/Desktop/open-source/R/documentation/gmm.html
new file mode 100644
index 00000000..188c9cd9
--- /dev/null
+++ b/Desktop/open-source/R/documentation/gmm.html
@@ -0,0 +1,136 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(mclust) # Gaussian mixture model (GMM)
+
+
## Error in library(mclust): there is no package called 'mclust'
+
+
gmm_fit <- Mclust(iris[, 1:4]) # Fit a GMM model
+
+
## Error in Mclust(iris[, 1:4]): could not find function "Mclust"
+
+
summary(gmm_fit) # Summary table
+
+
## Error in summary(gmm_fit): object 'gmm_fit' not found
+
+
plot(gmm_fit, 'BIC') # Select model based on BIC
+
+
## Error in plot(gmm_fit, "BIC"): object 'gmm_fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/gmm.md b/Desktop/open-source/R/documentation/gmm.md
new file mode 100644
index 00000000..bde46be3
--- /dev/null
+++ b/Desktop/open-source/R/documentation/gmm.md
@@ -0,0 +1,34 @@
+
+
+``` r
+library(mclust) # Gaussian mixture model (GMM)
+```
+
+```
+## Error in library(mclust): there is no package called 'mclust'
+```
+
+``` r
+gmm_fit <- Mclust(iris[, 1:4]) # Fit a GMM model
+```
+
+```
+## Error in Mclust(iris[, 1:4]): could not find function "Mclust"
+```
+
+``` r
+summary(gmm_fit) # Summary table
+```
+
+```
+## Error in summary(gmm_fit): object 'gmm_fit' not found
+```
+
+``` r
+plot(gmm_fit, 'BIC') # Select model based on BIC
+```
+
+```
+## Error in plot(gmm_fit, "BIC"): object 'gmm_fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/gradient_boosting_algorithms.html b/Desktop/open-source/R/documentation/gradient_boosting_algorithms.html
new file mode 100644
index 00000000..2847253c
--- /dev/null
+++ b/Desktop/open-source/R/documentation/gradient_boosting_algorithms.html
@@ -0,0 +1,197 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# GBM
+library(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+fitControl <- trainControl( method = "repeatedcv", number = 4, repeats = 4)
+fit <- train(y ~ ., data = x, method = "gbm", trControl = fitControl,verbose = FALSE)
+predicted= predict(fit,x_test,type= "prob")[,2]
+
+
+
+# XGBoost
+require(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+TrainControl <- trainControl( method = "repeatedcv", number = 10, repeats = 4)
+model<- train(y ~ ., data = x, method = "xgbLinear", trControl = TrainControl,verbose = FALSE)
+# OR
+model<- train(y ~ ., data = x, method = "xgbTree", trControl = TrainControl,verbose = FALSE)
+predicted <- predict(model, x_test)
+
+
+
+# LightGBM
+library(RLightGBM)
+data(example.binary)
+# Parameters
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+# Create data handle and booster
+handle.data <- lgbm.data.create(x)
+lgbm.data.setField(handle.data, "label", y)
+handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+# Train for num_iterations iterations and eval every 5 steps
+lgbm.booster.train(handle.booster, num_iterations, 5)
+# Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+# Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+# Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
+
+
+
+# Catboost
+set.seed(1)
+
+require(titanic)
+
+require(caret)
+
+require(catboost)
+
+tt <- titanic::titanic_train[complete.cases(titanic::titanic_train),]
+
+data <- as.data.frame(as.matrix(tt), stringsAsFactors = TRUE)
+
+drop_columns = c("PassengerId", "Survived", "Name", "Ticket", "Cabin")
+
+x <- data[,!(names(data) %in% drop_columns)]y <- data[,c("Survived")]
+
+fit_control <- trainControl(method = "cv", number = 4,classProbs = TRUE)
+
+grid <- expand.grid(depth = c(4, 6, 8),learning_rate = 0.1,iterations = 100, l2_leaf_reg = 1e-3, rsm = 0.95, border_count = 64)
+
+report <- train(x, as.factor(make.names(y)),method = catboost.caret,verbose = TRUE, preProc = NULL,tuneGrid = grid, trControl = fit_control)
+
+print(report)
+
+importance <- varImp(report, scale = FALSE)
+
+print(importance)
+
+
## Error in parse(text = input): <text>:59:45: unexpected symbol
+## 58:
+## 59: x <- data[,!(names(data) %in% drop_columns)]y
+## ^
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/gradient_boosting_algorithms.md b/Desktop/open-source/R/documentation/gradient_boosting_algorithms.md
new file mode 100644
index 00000000..577b4b6d
--- /dev/null
+++ b/Desktop/open-source/R/documentation/gradient_boosting_algorithms.md
@@ -0,0 +1,83 @@
+
+
+``` r
+# GBM
+library(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+fitControl <- trainControl( method = "repeatedcv", number = 4, repeats = 4)
+fit <- train(y ~ ., data = x, method = "gbm", trControl = fitControl,verbose = FALSE)
+predicted= predict(fit,x_test,type= "prob")[,2]
+
+
+
+# XGBoost
+require(caret)
+x <- cbind(x_train,y_train)
+# Fitting model
+TrainControl <- trainControl( method = "repeatedcv", number = 10, repeats = 4)
+model<- train(y ~ ., data = x, method = "xgbLinear", trControl = TrainControl,verbose = FALSE)
+# OR
+model<- train(y ~ ., data = x, method = "xgbTree", trControl = TrainControl,verbose = FALSE)
+predicted <- predict(model, x_test)
+
+
+
+# LightGBM
+library(RLightGBM)
+data(example.binary)
+# Parameters
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+# Create data handle and booster
+handle.data <- lgbm.data.create(x)
+lgbm.data.setField(handle.data, "label", y)
+handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+# Train for num_iterations iterations and eval every 5 steps
+lgbm.booster.train(handle.booster, num_iterations, 5)
+# Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+# Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+# Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
+
+
+
+# Catboost
+set.seed(1)
+
+require(titanic)
+
+require(caret)
+
+require(catboost)
+
+tt <- titanic::titanic_train[complete.cases(titanic::titanic_train),]
+
+data <- as.data.frame(as.matrix(tt), stringsAsFactors = TRUE)
+
+drop_columns = c("PassengerId", "Survived", "Name", "Ticket", "Cabin")
+
+x <- data[,!(names(data) %in% drop_columns)]y <- data[,c("Survived")]
+
+fit_control <- trainControl(method = "cv", number = 4,classProbs = TRUE)
+
+grid <- expand.grid(depth = c(4, 6, 8),learning_rate = 0.1,iterations = 100, l2_leaf_reg = 1e-3, rsm = 0.95, border_count = 64)
+
+report <- train(x, as.factor(make.names(y)),method = catboost.caret,verbose = TRUE, preProc = NULL,tuneGrid = grid, trControl = fit_control)
+
+print(report)
+
+importance <- varImp(report, scale = FALSE)
+
+print(importance)
+```
+
+```
+## Error in parse(text = input): :59:45: unexpected symbol
+## 58:
+## 59: x <- data[,!(names(data) %in% drop_columns)]y
+## ^
+```
+
diff --git a/Desktop/open-source/R/documentation/heirarchical_clustering.html b/Desktop/open-source/R/documentation/heirarchical_clustering.html
new file mode 100644
index 00000000..053841a4
--- /dev/null
+++ b/Desktop/open-source/R/documentation/heirarchical_clustering.html
@@ -0,0 +1,125 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
set.seed(42)
+clusters <- hclust(dist(iris[, -5]))
+plot(clusters)
+
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/heirarchical_clustering.md b/Desktop/open-source/R/documentation/heirarchical_clustering.md
new file mode 100644
index 00000000..f57831aa
--- /dev/null
+++ b/Desktop/open-source/R/documentation/heirarchical_clustering.md
@@ -0,0 +1,10 @@
+
+
+``` r
+set.seed(42)
+clusters <- hclust(dist(iris[, -5]))
+plot(clusters)
+```
+
+
+
diff --git a/Desktop/open-source/R/documentation/k_folds.html b/Desktop/open-source/R/documentation/k_folds.html
new file mode 100644
index 00000000..6169b9b2
--- /dev/null
+++ b/Desktop/open-source/R/documentation/k_folds.html
@@ -0,0 +1,150 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# K folds cross validation is essential for machine learning
+# createFolds function in package caret is easy to use
+# here we write our own function
+
+get_k_folds<-function(y = c(),k = 10, isList = TRUE, seed = 123){
+ set.seed(seed)
+ folds<-sample(1:length(y), length(y))
+ every_n<-ceiling(length(y)/k)
+ matFolds<-suppressWarnings(matrix(folds, ncol=every_n, byrow = T))
+
+ if(isList){
+ value<-NULL
+ rownames(matFolds)<-paste("Folds",1:k,sep="")
+ value<-lapply(1:k, function(x){
+ if(x == k){
+ return(matFolds[x,][1:(length(y)-every_n*(k-1))])
+ }else{
+ return(matFolds[x,])
+ }
+ })
+ }else{
+ value<-c()
+ for(i in 1:length(y)){
+ value[i]<-ceiling(i/every_n)
+ }
+ }
+
+ return(value)
+}
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/k_folds.md b/Desktop/open-source/R/documentation/k_folds.md
new file mode 100644
index 00000000..f57786d9
--- /dev/null
+++ b/Desktop/open-source/R/documentation/k_folds.md
@@ -0,0 +1,34 @@
+
+
+``` r
+# K folds cross validation is essential for machine learning
+# createFolds function in package caret is easy to use
+# here we write our own function
+
+get_k_folds<-function(y = c(),k = 10, isList = TRUE, seed = 123){
+ set.seed(seed)
+ folds<-sample(1:length(y), length(y))
+ every_n<-ceiling(length(y)/k)
+ matFolds<-suppressWarnings(matrix(folds, ncol=every_n, byrow = T))
+
+ if(isList){
+ value<-NULL
+ rownames(matFolds)<-paste("Folds",1:k,sep="")
+ value<-lapply(1:k, function(x){
+ if(x == k){
+ return(matFolds[x,][1:(length(y)-every_n*(k-1))])
+ }else{
+ return(matFolds[x,])
+ }
+ })
+ }else{
+ value<-c()
+ for(i in 1:length(y)){
+ value[i]<-ceiling(i/every_n)
+ }
+ }
+
+ return(value)
+}
+```
+
diff --git a/Desktop/open-source/R/documentation/k_means.html b/Desktop/open-source/R/documentation/k_means.html
new file mode 100644
index 00000000..ff4b4477
--- /dev/null
+++ b/Desktop/open-source/R/documentation/k_means.html
@@ -0,0 +1,126 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(cluster)
+set.seed(42)
+fit <- kmeans(X, 3) # 3 cluster solution
+
+
## Error in as.matrix(x): object 'X' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/k_means.md b/Desktop/open-source/R/documentation/k_means.md
new file mode 100644
index 00000000..81d99ce2
--- /dev/null
+++ b/Desktop/open-source/R/documentation/k_means.md
@@ -0,0 +1,12 @@
+
+
+``` r
+library(cluster)
+set.seed(42)
+fit <- kmeans(X, 3) # 3 cluster solution
+```
+
+```
+## Error in as.matrix(x): object 'X' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/kmeans_clustering.html b/Desktop/open-source/R/documentation/kmeans_clustering.html
new file mode 100644
index 00000000..c0641210
--- /dev/null
+++ b/Desktop/open-source/R/documentation/kmeans_clustering.html
@@ -0,0 +1,126 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
set.seed(42)
+cl <- kmeans(iris[,-5], 3)
+plot(iris[,-5], col = cl$cluster)
+points(cl$centers, col = 1:3, pch = 8)
+
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/kmeans_clustering.md b/Desktop/open-source/R/documentation/kmeans_clustering.md
new file mode 100644
index 00000000..ef47eb01
--- /dev/null
+++ b/Desktop/open-source/R/documentation/kmeans_clustering.md
@@ -0,0 +1,11 @@
+
+
+``` r
+set.seed(42)
+cl <- kmeans(iris[,-5], 3)
+plot(iris[,-5], col = cl$cluster)
+points(cl$centers, col = 1:3, pch = 8)
+```
+
+
+
diff --git a/Desktop/open-source/R/documentation/kmeans_raw_r.html b/Desktop/open-source/R/documentation/kmeans_raw_r.html
new file mode 100644
index 00000000..a7a4bc14
--- /dev/null
+++ b/Desktop/open-source/R/documentation/kmeans_raw_r.html
@@ -0,0 +1,174 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
custonKmeans<-function(dataset=NA,k=NA){
+ if(is.na(dataset) || is.na(k)){
+ stop("You must input valid parameters!")
+ }
+ Eudist<-function(x,y){
+ distance<-sqrt(sum((x-y)^2))
+ return (distance)
+ }
+
+ rows.dataset<-nrow(dataset)
+ continue.change=TRUE
+ initPoint<-dataset[sample.int(rows.dataset,size = k),]
+ formerPoint<-initPoint
+ iterPoint<-matrix(0,nrow = k,ncol = ncol(dataset))
+
+ #记录每一个点到每一个类的距离
+ error.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ while(continue.change){
+ #记录每个点所属的类是哪一个
+ cluster.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ for(i in 1:rows.dataset){#计算每个点到三个初始中心点的距离
+ for(j in 1:k){
+ error.matrix[i,j]<-Eudist(dataset[i,],formerPoint[j,])
+ }
+ }
+ #将每一个点所属的类计算出来
+ for(i in 1:rows.dataset){
+ cluster.matrix[i,which.min(error.matrix[i,])]<-1
+ }
+
+ #更新新的质心位置
+ for(i in 1:k){
+ iterPoint[i,]<-apply(dataset[which(cluster.matrix[,i] == 1),],2,"mean")
+ }
+ all.true<-c()
+ for(i in 1:k){
+ if(all(formerPoint[i,] == iterPoint[i,]) == T){
+ all.true[i]<-TRUE
+ }
+ }
+ formerPoint = iterPoint
+ continue.change=ifelse(all(all.true) == T,F,T)
+ }
+ colnames(iterPoint)<-colnames(dataset)
+ out=list()
+ out[["centers"]]<-iterPoint
+ out[["distance"]]<-error.matrix
+ out[["cluster"]]<-rep(1,rows.dataset)
+ for(i in 1:rows.dataset){
+ out[["cluster"]][i]<-which(cluster.matrix[i,] == 1)
+ }
+ return(out)
+}
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/kmeans_raw_r.md b/Desktop/open-source/R/documentation/kmeans_raw_r.md
new file mode 100644
index 00000000..19db849b
--- /dev/null
+++ b/Desktop/open-source/R/documentation/kmeans_raw_r.md
@@ -0,0 +1,58 @@
+
+
+``` r
+custonKmeans<-function(dataset=NA,k=NA){
+ if(is.na(dataset) || is.na(k)){
+ stop("You must input valid parameters!")
+ }
+ Eudist<-function(x,y){
+ distance<-sqrt(sum((x-y)^2))
+ return (distance)
+ }
+
+ rows.dataset<-nrow(dataset)
+ continue.change=TRUE
+ initPoint<-dataset[sample.int(rows.dataset,size = k),]
+ formerPoint<-initPoint
+ iterPoint<-matrix(0,nrow = k,ncol = ncol(dataset))
+
+ #记录每一个点到每一个类的距离
+ error.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ while(continue.change){
+ #记录每个点所属的类是哪一个
+ cluster.matrix<-matrix(0,nrow=rows.dataset,ncol=k)
+ for(i in 1:rows.dataset){#计算每个点到三个初始中心点的距离
+ for(j in 1:k){
+ error.matrix[i,j]<-Eudist(dataset[i,],formerPoint[j,])
+ }
+ }
+ #将每一个点所属的类计算出来
+ for(i in 1:rows.dataset){
+ cluster.matrix[i,which.min(error.matrix[i,])]<-1
+ }
+
+ #更新新的质心位置
+ for(i in 1:k){
+ iterPoint[i,]<-apply(dataset[which(cluster.matrix[,i] == 1),],2,"mean")
+ }
+ all.true<-c()
+ for(i in 1:k){
+ if(all(formerPoint[i,] == iterPoint[i,]) == T){
+ all.true[i]<-TRUE
+ }
+ }
+ formerPoint = iterPoint
+ continue.change=ifelse(all(all.true) == T,F,T)
+ }
+ colnames(iterPoint)<-colnames(dataset)
+ out=list()
+ out[["centers"]]<-iterPoint
+ out[["distance"]]<-error.matrix
+ out[["cluster"]]<-rep(1,rows.dataset)
+ for(i in 1:rows.dataset){
+ out[["cluster"]][i]<-which(cluster.matrix[i,] == 1)
+ }
+ return(out)
+}
+```
+
diff --git a/Desktop/open-source/R/documentation/knn.html b/Desktop/open-source/R/documentation/knn.html
new file mode 100644
index 00000000..bc8725be
--- /dev/null
+++ b/Desktop/open-source/R/documentation/knn.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(knn)
+
+
## Error in library(knn): there is no package called 'knn'
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Fitting model
+fit <-knn(y_train ~ ., data = x,k=5)
+
+
## Error in knn(y_train ~ ., data = x, k = 5): could not find function "knn"
+
+
summary(fit)
+
+
## Error in summary(fit): object 'fit' not found
+
+
# Predict Output
+predicted= predict(fit,x_test)
+
+
## Error in predict(fit, x_test): object 'fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/knn.md b/Desktop/open-source/R/documentation/knn.md
new file mode 100644
index 00000000..b9954008
--- /dev/null
+++ b/Desktop/open-source/R/documentation/knn.md
@@ -0,0 +1,44 @@
+
+
+``` r
+library(knn)
+```
+
+```
+## Error in library(knn): there is no package called 'knn'
+```
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Fitting model
+fit <-knn(y_train ~ ., data = x,k=5)
+```
+
+```
+## Error in knn(y_train ~ ., data = x, k = 5): could not find function "knn"
+```
+
+``` r
+summary(fit)
+```
+
+```
+## Error in summary(fit): object 'fit' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(fit,x_test)
+```
+
+```
+## Error in predict(fit, x_test): object 'fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/lasso.html b/Desktop/open-source/R/documentation/lasso.html
new file mode 100644
index 00000000..60cd2d50
--- /dev/null
+++ b/Desktop/open-source/R/documentation/lasso.html
@@ -0,0 +1,164 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
data(ggplot2::diamonds)
+
+
## Warning in data(ggplot2::diamonds): data set 'ggplot2::diamonds' not found
+
+
library(caret)
+
+
## Error in library(caret): there is no package called 'caret'
+
+
library(dplyr)
+
+
## Error in library(dplyr): there is no package called 'dplyr'
+
+
dia.trans<-bind_cols(diamonds %>% select_if(is.numeric),
+ model.matrix(~cut-1,diamonds) %>% as_tibble(),
+ model.matrix(~color-1,diamonds) %>% as_tibble(),
+ model.matrix(~clarity-1,diamonds) %>% as_tibble())
+
+
## Error in bind_cols(diamonds %>% select_if(is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+
+
#setting parameters alpha and lambda
+lasso_expand<-expand.grid(alpha = 1, lambda = seq(0.001,0.1,by = 0.0005))
+lasso_mod <- train(x=dia.trans %>% select(-price), y=dia.trans$price, method='glmnet',
+ tuneGrid=lasso_expand)
+
+
## Error in train(x = dia.trans %>% select(-price), y = dia.trans$price, : could not find function "train"
+
+
#best tune
+lasso_mod$bestTune
+
+
## Error: object 'lasso_mod' not found
+
+
lasso_mod$results$RMSE
+
+
## Error: object 'lasso_mod' not found
+
+
lasso_imp<-varImp(lasso_mod)
+
+
## Error in varImp(lasso_mod): could not find function "varImp"
+
+
#get the importance of each feature and eliminate some of them
+lasso_imp$importance
+
+
## Error: object 'lasso_imp' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/lasso.md b/Desktop/open-source/R/documentation/lasso.md
new file mode 100644
index 00000000..ce892790
--- /dev/null
+++ b/Desktop/open-source/R/documentation/lasso.md
@@ -0,0 +1,82 @@
+
+
+``` r
+data(ggplot2::diamonds)
+```
+
+```
+## Warning in data(ggplot2::diamonds): data set 'ggplot2::diamonds' not found
+```
+
+``` r
+library(caret)
+```
+
+```
+## Error in library(caret): there is no package called 'caret'
+```
+
+``` r
+library(dplyr)
+```
+
+```
+## Error in library(dplyr): there is no package called 'dplyr'
+```
+
+``` r
+dia.trans<-bind_cols(diamonds %>% select_if(is.numeric),
+ model.matrix(~cut-1,diamonds) %>% as_tibble(),
+ model.matrix(~color-1,diamonds) %>% as_tibble(),
+ model.matrix(~clarity-1,diamonds) %>% as_tibble())
+```
+
+```
+## Error in bind_cols(diamonds %>% select_if(is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+```
+
+``` r
+#setting parameters alpha and lambda
+lasso_expand<-expand.grid(alpha = 1, lambda = seq(0.001,0.1,by = 0.0005))
+lasso_mod <- train(x=dia.trans %>% select(-price), y=dia.trans$price, method='glmnet',
+ tuneGrid=lasso_expand)
+```
+
+```
+## Error in train(x = dia.trans %>% select(-price), y = dia.trans$price, : could not find function "train"
+```
+
+``` r
+#best tune
+lasso_mod$bestTune
+```
+
+```
+## Error: object 'lasso_mod' not found
+```
+
+``` r
+lasso_mod$results$RMSE
+```
+
+```
+## Error: object 'lasso_mod' not found
+```
+
+``` r
+lasso_imp<-varImp(lasso_mod)
+```
+
+```
+## Error in varImp(lasso_mod): could not find function "varImp"
+```
+
+``` r
+#get the importance of each feature and eliminate some of them
+lasso_imp$importance
+```
+
+```
+## Error: object 'lasso_imp' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/light_gbm.html b/Desktop/open-source/R/documentation/light_gbm.html
new file mode 100644
index 00000000..cb9105a1
--- /dev/null
+++ b/Desktop/open-source/R/documentation/light_gbm.html
@@ -0,0 +1,167 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(RLightGBM)
+
+
## Error in library(RLightGBM): there is no package called 'RLightGBM'
+
+
data(example.binary)
+
+
## Warning in data(example.binary): data set 'example.binary' not found
+
+
#Parameters
+
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+
+#Create data handle and booster
+handle.data <- lgbm.data.create(x)
+
+
## Error in lgbm.data.create(x): could not find function "lgbm.data.create"
+
+
lgbm.data.setField(handle.data, "label", y)
+
+
## Error in lgbm.data.setField(handle.data, "label", y): could not find function "lgbm.data.setField"
+
+
handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+
+
## Error in lgbm.booster.create(handle.data, lapply(config, as.character)): could not find function "lgbm.booster.create"
+
+
#Train for num_iterations iterations and eval every 5 steps
+
+lgbm.booster.train(handle.booster, num_iterations, 5)
+
+
## Error in lgbm.booster.train(handle.booster, num_iterations, 5): could not find function "lgbm.booster.train"
+
+
#Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+
+
## Error in lgbm.booster.predict(handle.booster, x.test): could not find function "lgbm.booster.predict"
+
+
#Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+
+
## Error: object 'y.test' not found
+
+
#Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
+
+
## Error in lgbm.booster.save(handle.booster, filename = "/tmp/model.txt"): could not find function "lgbm.booster.save"
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/light_gbm.md b/Desktop/open-source/R/documentation/light_gbm.md
new file mode 100644
index 00000000..7e77313c
--- /dev/null
+++ b/Desktop/open-source/R/documentation/light_gbm.md
@@ -0,0 +1,85 @@
+
+
+``` r
+library(RLightGBM)
+```
+
+```
+## Error in library(RLightGBM): there is no package called 'RLightGBM'
+```
+
+``` r
+data(example.binary)
+```
+
+```
+## Warning in data(example.binary): data set 'example.binary' not found
+```
+
+``` r
+#Parameters
+
+num_iterations <- 100
+config <- list(objective = "binary", metric="binary_logloss,auc", learning_rate = 0.1, num_leaves = 63, tree_learner = "serial", feature_fraction = 0.8, bagging_freq = 5, bagging_fraction = 0.8, min_data_in_leaf = 50, min_sum_hessian_in_leaf = 5.0)
+
+#Create data handle and booster
+handle.data <- lgbm.data.create(x)
+```
+
+```
+## Error in lgbm.data.create(x): could not find function "lgbm.data.create"
+```
+
+``` r
+lgbm.data.setField(handle.data, "label", y)
+```
+
+```
+## Error in lgbm.data.setField(handle.data, "label", y): could not find function "lgbm.data.setField"
+```
+
+``` r
+handle.booster <- lgbm.booster.create(handle.data, lapply(config, as.character))
+```
+
+```
+## Error in lgbm.booster.create(handle.data, lapply(config, as.character)): could not find function "lgbm.booster.create"
+```
+
+``` r
+#Train for num_iterations iterations and eval every 5 steps
+
+lgbm.booster.train(handle.booster, num_iterations, 5)
+```
+
+```
+## Error in lgbm.booster.train(handle.booster, num_iterations, 5): could not find function "lgbm.booster.train"
+```
+
+``` r
+#Predict
+pred <- lgbm.booster.predict(handle.booster, x.test)
+```
+
+```
+## Error in lgbm.booster.predict(handle.booster, x.test): could not find function "lgbm.booster.predict"
+```
+
+``` r
+#Test accuracy
+sum(y.test == (y.pred > 0.5)) / length(y.test)
+```
+
+```
+## Error: object 'y.test' not found
+```
+
+``` r
+#Save model (can be loaded again via lgbm.booster.load(filename))
+lgbm.booster.save(handle.booster, filename = "/tmp/model.txt")
+```
+
+```
+## Error in lgbm.booster.save(handle.booster, filename = "/tmp/model.txt"): could not find function "lgbm.booster.save"
+```
+
diff --git a/Desktop/open-source/R/documentation/linear_regression.html b/Desktop/open-source/R/documentation/linear_regression.html
new file mode 100644
index 00000000..ad04ea91
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linear_regression.html
@@ -0,0 +1,152 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Load Train and Test datasets
+# Identify feature and response variable(s) and values must be numeric and numpy arrays
+x_train <- input_variables_values_training_datasets
+
+
## Error: object 'input_variables_values_training_datasets' not found
+
+
y_train <- target_variables_values_training_datasets
+
+
## Error: object 'target_variables_values_training_datasets' not found
+
+
x_test <- input_variables_values_test_datasets
+
+
## Error: object 'input_variables_values_test_datasets' not found
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Train the model using the training sets and check score
+linear <- lm(y_train ~ ., data = x)
+
+
## Error in model.frame.default(formula = y_train ~ ., data = x, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+
+
summary(linear)
+
+
## Error in summary(linear): object 'linear' not found
+
+
# Predict Output
+predicted= predict(linear,x_test)
+
+
## Error in predict(linear, x_test): object 'linear' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/linear_regression.md b/Desktop/open-source/R/documentation/linear_regression.md
new file mode 100644
index 00000000..600054e3
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linear_regression.md
@@ -0,0 +1,62 @@
+
+
+``` r
+# Load Train and Test datasets
+# Identify feature and response variable(s) and values must be numeric and numpy arrays
+x_train <- input_variables_values_training_datasets
+```
+
+```
+## Error: object 'input_variables_values_training_datasets' not found
+```
+
+``` r
+y_train <- target_variables_values_training_datasets
+```
+
+```
+## Error: object 'target_variables_values_training_datasets' not found
+```
+
+``` r
+x_test <- input_variables_values_test_datasets
+```
+
+```
+## Error: object 'input_variables_values_test_datasets' not found
+```
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Train the model using the training sets and check score
+linear <- lm(y_train ~ ., data = x)
+```
+
+```
+## Error in model.frame.default(formula = y_train ~ ., data = x, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+```
+
+``` r
+summary(linear)
+```
+
+```
+## Error in summary(linear): object 'linear' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(linear,x_test)
+```
+
+```
+## Error in predict(linear, x_test): object 'linear' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/linear_search.html b/Desktop/open-source/R/documentation/linear_search.html
new file mode 100644
index 00000000..7eb9157e
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linear_search.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
linear_search<-function(vector, search_value){ #made a function named linear_search having two parameters that are an array and a value to be searched
+ for(i in 1:length(vector)){
+ if(vector[i]==search_value){ #comparing each value of array with the value to be searched
+ return (i)
+ }
+ }
+ return (-1)
+}
+
+user_vec<- c(10,20,30,40,50,60) #input array (hard code)
+user_val<-30 #input value to be searched (hard code)
+
+result<-linear_search(user_vec,user_val) #linear_seach function calling
+
+if(result!=-1){
+ cat("Searched value", user_val, "found at index", result-1) #displaying the index at which value is found (if any)
+}else{
+ cat("Searched value does not exist in array")
+}
+
+
## Searched value 30 found at index 2
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/linear_search.md b/Desktop/open-source/R/documentation/linear_search.md
new file mode 100644
index 00000000..7ce26245
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linear_search.md
@@ -0,0 +1,28 @@
+
+
+``` r
+linear_search<-function(vector, search_value){ #made a function named linear_search having two parameters that are an array and a value to be searched
+ for(i in 1:length(vector)){
+ if(vector[i]==search_value){ #comparing each value of array with the value to be searched
+ return (i)
+ }
+ }
+ return (-1)
+}
+
+user_vec<- c(10,20,30,40,50,60) #input array (hard code)
+user_val<-30 #input value to be searched (hard code)
+
+result<-linear_search(user_vec,user_val) #linear_seach function calling
+
+if(result!=-1){
+ cat("Searched value", user_val, "found at index", result-1) #displaying the index at which value is found (if any)
+}else{
+ cat("Searched value does not exist in array")
+}
+```
+
+```
+## Searched value 30 found at index 2
+```
+
diff --git a/Desktop/open-source/R/documentation/linearregressionrawr.html b/Desktop/open-source/R/documentation/linearregressionrawr.html
new file mode 100644
index 00000000..11895f9c
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linearregressionrawr.html
@@ -0,0 +1,133 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
ols<-function(y,x){
+ data<-model.matrix(y ~ ., data = x)
+ decomp <- svd(data)
+ return(decomp$v %*% diag(1 / decomp$d) %*% t(decomp$u) %*% y)
+ }
+
+set.seed(1)
+x <- rnorm(1000)
+y <- 4 * x + rnorm(1000, sd = .5)
+ols(y=y,x=matrix(x, ncol = 1))
+
+
## Error in terms.formula(object, data = data): '.' in formula and no 'data' argument
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/linearregressionrawr.md b/Desktop/open-source/R/documentation/linearregressionrawr.md
new file mode 100644
index 00000000..8a3180a5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/linearregressionrawr.md
@@ -0,0 +1,19 @@
+
+
+``` r
+ols<-function(y,x){
+ data<-model.matrix(y ~ ., data = x)
+ decomp <- svd(data)
+ return(decomp$v %*% diag(1 / decomp$d) %*% t(decomp$u) %*% y)
+ }
+
+set.seed(1)
+x <- rnorm(1000)
+y <- 4 * x + rnorm(1000, sd = .5)
+ols(y=y,x=matrix(x, ncol = 1))
+```
+
+```
+## Error in terms.formula(object, data = data): '.' in formula and no 'data' argument
+```
+
diff --git a/Desktop/open-source/R/documentation/logistic_regression.html b/Desktop/open-source/R/documentation/logistic_regression.html
new file mode 100644
index 00000000..36256448
--- /dev/null
+++ b/Desktop/open-source/R/documentation/logistic_regression.html
@@ -0,0 +1,138 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Train the model using the training sets and check score
+logistic <- glm(y_train ~ ., data = x,family='binomial')
+
+
## Error in model.frame.default(formula = y_train ~ ., data = x, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+
+
summary(logistic)
+
+
## Error in summary(logistic): object 'logistic' not found
+
+
# Predict Output
+predicted= predict(logistic,x_test)
+
+
## Error in predict(logistic, x_test): object 'logistic' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/logistic_regression.md b/Desktop/open-source/R/documentation/logistic_regression.md
new file mode 100644
index 00000000..5a938752
--- /dev/null
+++ b/Desktop/open-source/R/documentation/logistic_regression.md
@@ -0,0 +1,36 @@
+
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Train the model using the training sets and check score
+logistic <- glm(y_train ~ ., data = x,family='binomial')
+```
+
+```
+## Error in model.frame.default(formula = y_train ~ ., data = x, drop.unused.levels = TRUE): 'data' must be a data.frame, environment, or list
+```
+
+``` r
+summary(logistic)
+```
+
+```
+## Error in summary(logistic): object 'logistic' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(logistic,x_test)
+```
+
+```
+## Error in predict(logistic, x_test): object 'logistic' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/logistic_regression_2.html b/Desktop/open-source/R/documentation/logistic_regression_2.html
new file mode 100644
index 00000000..e1d88cb4
--- /dev/null
+++ b/Desktop/open-source/R/documentation/logistic_regression_2.html
@@ -0,0 +1,137 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Introduction to logistic regression
+
+# glm stands for Generalized Linear Model
+mod1 <- glm(y_data~x_data, data=name_of_the_dataframe, family="binomial")
+
+
## Error in is.data.frame(data): object 'name_of_the_dataframe' not found
+
+
# displays the output of the model computed by the previous line
+summary(mod1)
+
+
## Error in summary(mod1): object 'mod1' not found
+
+
# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod1, x_test_data)
+
+
## Error in predict(mod1, x_test_data): object 'mod1' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/logistic_regression_2.md b/Desktop/open-source/R/documentation/logistic_regression_2.md
new file mode 100644
index 00000000..6418465c
--- /dev/null
+++ b/Desktop/open-source/R/documentation/logistic_regression_2.md
@@ -0,0 +1,31 @@
+
+
+``` r
+# Introduction to logistic regression
+
+# glm stands for Generalized Linear Model
+mod1 <- glm(y_data~x_data, data=name_of_the_dataframe, family="binomial")
+```
+
+```
+## Error in is.data.frame(data): object 'name_of_the_dataframe' not found
+```
+
+``` r
+# displays the output of the model computed by the previous line
+summary(mod1)
+```
+
+```
+## Error in summary(mod1): object 'mod1' not found
+```
+
+``` r
+# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod1, x_test_data)
+```
+
+```
+## Error in predict(mod1, x_test_data): object 'mod1' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/longest_common_subsequence.html b/Desktop/open-source/R/documentation/longest_common_subsequence.html
new file mode 100644
index 00000000..cb059c0a
--- /dev/null
+++ b/Desktop/open-source/R/documentation/longest_common_subsequence.html
@@ -0,0 +1,647 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Longest Common Subsequence (LCS) Algorithm
+#
+# The LCS problem finds the longest subsequence common to two sequences.
+# A subsequence is a sequence derived from another sequence by deleting some
+# or no elements without changing the order of the remaining elements.
+#
+# Time Complexity: O(m * n) where m, n are lengths of the sequences
+# Space Complexity: O(m * n) for the DP table, O(min(m, n)) optimized version
+#
+# Applications:
+# - DNA sequence analysis in bioinformatics
+# - File difference utilities (diff command)
+# - Version control systems (git diff)
+# - Plagiarism detection
+# - Data compression algorithms
+# - Edit distance calculations
+
+# Basic LCS algorithm with full DP table
+lcs_length <- function(str1, str2) {
+ #' Find the length of longest common subsequence
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ return(dp[m + 1, n + 1])
+}
+
+# LCS algorithm that returns the actual subsequence
+lcs_string <- function(str1, str2) {
+ #' Find the longest common subsequence string
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List containing LCS string and its length
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS string
+ lcs <- ""
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ lcs <- paste0(substr(str1, i - 1, i - 1), lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1],
+ dp_table = dp
+ ))
+}
+
+# Space-optimized LCS (only returns length)
+lcs_length_optimized <- function(str1, str2) {
+ #' Space-optimized LCS length calculation
+ #' Uses only O(min(m, n)) space instead of O(m * n)
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Make str1 the shorter string for space optimization
+ if (m > n) {
+ temp <- str1
+ str1 <- str2
+ str2 <- temp
+ temp <- m
+ m <- n
+ n <- temp
+ }
+
+ # Use two arrays instead of full matrix
+ prev <- rep(0, m + 1)
+ curr <- rep(0, m + 1)
+
+ for (j in 1:(n + 1)) {
+ for (i in 1:(m + 1)) {
+ if (i == 1 || j == 1) {
+ curr[i] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ curr[i] <- prev[i - 1] + 1
+ } else {
+ curr[i] <- max(prev[i], curr[i - 1])
+ }
+ }
+ # Swap arrays
+ temp <- prev
+ prev <- curr
+ curr <- temp
+ }
+
+ return(prev[m + 1])
+}
+
+# Find all possible LCS strings (there can be multiple)
+find_all_lcs <- function(str1, str2) {
+ #' Find all possible longest common subsequences
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List of all LCS strings
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Recursive function to find all LCS
+ find_all_lcs_recursive <- function(i, j) {
+ if (i == 1 || j == 1) {
+ return("")
+ }
+
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ char <- substr(str1, i - 1, i - 1)
+ prev_lcs <- find_all_lcs_recursive(i - 1, j - 1)
+ return(paste0(prev_lcs, char))
+ } else {
+ results <- c()
+
+ if (dp[i - 1, j] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i - 1, j))
+ }
+
+ if (dp[i, j - 1] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i, j - 1))
+ }
+
+ return(unique(results))
+ }
+ }
+
+ all_lcs <- find_all_lcs_recursive(m + 1, n + 1)
+ return(unique(all_lcs))
+}
+
+# LCS for arrays/vectors instead of strings
+lcs_array <- function(arr1, arr2) {
+ #' Find LCS of two arrays/vectors
+ #' @param arr1: First array
+ #' @param arr2: Second array
+ #' @return: List with LCS array and length
+
+ m <- length(arr1)
+ n <- length(arr2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (arr1[i - 1] == arr2[j - 1]) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS
+ lcs <- c()
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (arr1[i - 1] == arr2[j - 1]) {
+ lcs <- c(arr1[i - 1], lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1]
+ ))
+}
+
+# Function to print the DP table (for educational purposes)
+print_dp_table <- function(str1, str2, dp_table) {
+ #' Print the DP table in a readable format
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @param dp_table: DP table from lcs_string function
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ cat("DP Table for LCS calculation:\n")
+ cat("String 1:", str1, "\n")
+ cat("String 2:", str2, "\n\n")
+
+ # Print column headers
+ cat(" ε ")
+ for (j in 1:n) {
+ cat(sprintf("%2s ", substr(str2, j, j)))
+ }
+ cat("\n")
+
+ # Print table rows
+ for (i in 1:(m + 1)) {
+ if (i == 1) {
+ cat(" ε ")
+ } else {
+ cat(sprintf("%2s ", substr(str1, i - 1, i - 1)))
+ }
+
+ for (j in 1:(n + 1)) {
+ cat(sprintf("%2d ", dp_table[i, j]))
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# Example usage and testing
+cat("=== Longest Common Subsequence (LCS) Algorithm ===\n\n")
+
+
## === Longest Common Subsequence (LCS) Algorithm ===
+
+
# Test 1: Basic LCS example
+cat("1. Basic LCS Example\n")
+
+
## 1. Basic LCS Example
+
+
str1 <- "ABCDGH"
+str2 <- "AEDFHR"
+
+cat("String 1:", str1, "\n")
+
+
## String 1: ABCDGH
+
+
cat("String 2:", str2, "\n")
+
+
## String 2: AEDFHR
+
+
result1 <- lcs_string(str1, str2)
+cat("LCS:", result1$lcs, "\n")
+
+
## LCS: ADH
+
+
cat("Length:", result1$length, "\n")
+
+
## Length: 3
+
+
print_dp_table(str1, str2, result1$dp_table)
+
+
## DP Table for LCS calculation:
+## String 1: ABCDGH
+## String 2: AEDFHR
+##
+## ε A E D F H R
+## ε 0 0 0 0 0 0 0
+## A 0 1 1 1 1 1 1
+## B 0 1 1 1 1 1 1
+## C 0 1 1 1 1 1 1
+## D 0 1 1 2 2 2 2
+## G 0 1 1 2 2 2 2
+## H 0 1 1 2 2 3 3
+
+
# Test 2: DNA sequence analysis
+cat("2. DNA Sequence Analysis\n")
+
+
## 2. DNA Sequence Analysis
+
+
dna1 <- "ATCGATCGATCG"
+dna2 <- "ATGCGATGCATG"
+
+cat("DNA Sequence 1:", dna1, "\n")
+
+
## DNA Sequence 1: ATCGATCGATCG
+
+
cat("DNA Sequence 2:", dna2, "\n")
+
+
## DNA Sequence 2: ATGCGATGCATG
+
+
dna_result <- lcs_string(dna1, dna2)
+cat("Common subsequence:", dna_result$lcs, "\n")
+
+
## Common subsequence: ATCGATGATG
+
+
cat("Length:", dna_result$length, "\n")
+
+
## Length: 10
+
+
cat("Similarity:", sprintf("%.2f%%", dna_result$length / max(nchar(dna1), nchar(dna2)) * 100), "\n\n")
+
+
## Similarity: 83.33%
+
+
# Test 3: Finding all possible LCS
+cat("3. Multiple LCS Solutions\n")
+
+
## 3. Multiple LCS Solutions
+
+
str3 <- "ABCDEF"
+str4 <- "ACBDEF"
+
+cat("String 1:", str3, "\n")
+
+
## String 1: ABCDEF
+
+
cat("String 2:", str4, "\n")
+
+
## String 2: ACBDEF
+
+
all_lcs <- find_all_lcs(str3, str4)
+cat("All possible LCS:\n")
+
+
## All possible LCS:
+
+
for (i in seq_along(all_lcs)) {
+ cat(" ", i, ":", all_lcs[i], "\n")
+}
+
+
## 1 : ABDEF
+## 2 : ACDEF
+
+
cat("\n")
+
+
# Test 4: Array LCS example
+cat("4. Array LCS Example\n")
+
+
## 4. Array LCS Example
+
+
arr1 <- c(1, 2, 3, 4, 5)
+arr2 <- c(2, 3, 5, 7, 8)
+
+cat("Array 1:", paste(arr1, collapse = ", "), "\n")
+
+
## Array 1: 1, 2, 3, 4, 5
+
+
cat("Array 2:", paste(arr2, collapse = ", "), "\n")
+
+
## Array 2: 2, 3, 5, 7, 8
+
+
arr_result <- lcs_array(arr1, arr2)
+cat("LCS Array:", paste(arr_result$lcs, collapse = ", "), "\n")
+
+
## LCS Array: 2, 3, 5
+
+
cat("Length:", arr_result$length, "\n\n")
+
+
## Length: 3
+
+
# Test 5: Performance comparison
+cat("5. Performance Comparison\n")
+
+
## 5. Performance Comparison
+
+
long_str1 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+long_str2 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+
+cat("Testing with strings of length 100...\n")
+
+
## Testing with strings of length 100...
+
+
# Standard algorithm
+start_time <- Sys.time()
+standard_result <- lcs_length(long_str1, long_str2)
+standard_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+# Optimized algorithm
+start_time <- Sys.time()
+optimized_result <- lcs_length_optimized(long_str1, long_str2)
+optimized_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Standard algorithm: LCS length =", standard_result,
+ "Time:", sprintf("%.6f", standard_time), "seconds\n")
+
+
## Standard algorithm: LCS length = 59 Time: 0.052980 seconds
+
+
cat("Optimized algorithm: LCS length =", optimized_result,
+ "Time:", sprintf("%.6f", optimized_time), "seconds\n")
+
+
## Optimized algorithm: LCS length = 59 Time: 0.049993 seconds
+
+
cat("Results match:", standard_result == optimized_result, "\n\n")
+
+
## Results match: TRUE
+
+
# Test 6: Edge cases
+cat("6. Edge Cases\n")
+
+
## 6. Edge Cases
+
+
cat("Empty strings:", lcs_length("", "ABC"), "\n")
+
+
## Empty strings: 0
+
+
cat("One empty string:", lcs_length("ABC", ""), "\n")
+
+
## One empty string: 0
+
+
cat("Identical strings:", lcs_length("HELLO", "HELLO"), "\n")
+
+
## Identical strings: 5
+
+
cat("No common characters:", lcs_length("ABC", "DEF"), "\n")
+
+
## No common characters: 0
+
+
cat("Single character:", lcs_length("A", "A"), "\n")
+
+
## Single character: 1
+
+
cat("Single vs multiple:", lcs_length("A", "ABCDEF"), "\n\n")
+
+
## Single vs multiple: 1
+
+
# Test 7: Real-world example - File diff simulation
+cat("7. File Diff Simulation\n")
+
+
## 7. File Diff Simulation
+
+
file1_lines <- c("Hello World", "This is line 2", "Line 3 here", "Final line")
+file2_lines <- c("Hello World", "This is modified line 2", "Line 3 here", "New line", "Final line")
+
+cat("File 1 lines:\n")
+
+
## File 1 lines:
+
+
for (i in seq_along(file1_lines)) {
+ cat(" ", i, ":", file1_lines[i], "\n")
+}
+
+
## 1 : Hello World
+## 2 : This is line 2
+## 3 : Line 3 here
+## 4 : Final line
+
+
cat("File 2 lines:\n")
+
+
## File 2 lines:
+
+
for (i in seq_along(file2_lines)) {
+ cat(" ", i, ":", file2_lines[i], "\n")
+}
+
+
## 1 : Hello World
+## 2 : This is modified line 2
+## 3 : Line 3 here
+## 4 : New line
+## 5 : Final line
+
+
file_lcs <- lcs_array(file1_lines, file2_lines)
+cat("Common lines (unchanged):\n")
+
+
## Common lines (unchanged):
+
+
for (i in seq_along(file_lcs$lcs)) {
+ cat(" ", file_lcs$lcs[i], "\n")
+}
+
+
## Hello World
+## Line 3 here
+## Final line
+
+
cat("Similarity:", sprintf("%.1f%%", file_lcs$length / max(length(file1_lines), length(file2_lines)) * 100), "\n")
+
+
## Similarity: 60.0%
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/longest_common_subsequence.md b/Desktop/open-source/R/documentation/longest_common_subsequence.md
new file mode 100644
index 00000000..b0eb4b87
--- /dev/null
+++ b/Desktop/open-source/R/documentation/longest_common_subsequence.md
@@ -0,0 +1,703 @@
+
+
+``` r
+# Longest Common Subsequence (LCS) Algorithm
+#
+# The LCS problem finds the longest subsequence common to two sequences.
+# A subsequence is a sequence derived from another sequence by deleting some
+# or no elements without changing the order of the remaining elements.
+#
+# Time Complexity: O(m * n) where m, n are lengths of the sequences
+# Space Complexity: O(m * n) for the DP table, O(min(m, n)) optimized version
+#
+# Applications:
+# - DNA sequence analysis in bioinformatics
+# - File difference utilities (diff command)
+# - Version control systems (git diff)
+# - Plagiarism detection
+# - Data compression algorithms
+# - Edit distance calculations
+
+# Basic LCS algorithm with full DP table
+lcs_length <- function(str1, str2) {
+ #' Find the length of longest common subsequence
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ return(dp[m + 1, n + 1])
+}
+
+# LCS algorithm that returns the actual subsequence
+lcs_string <- function(str1, str2) {
+ #' Find the longest common subsequence string
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List containing LCS string and its length
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS string
+ lcs <- ""
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ lcs <- paste0(substr(str1, i - 1, i - 1), lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1],
+ dp_table = dp
+ ))
+}
+
+# Space-optimized LCS (only returns length)
+lcs_length_optimized <- function(str1, str2) {
+ #' Space-optimized LCS length calculation
+ #' Uses only O(min(m, n)) space instead of O(m * n)
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Make str1 the shorter string for space optimization
+ if (m > n) {
+ temp <- str1
+ str1 <- str2
+ str2 <- temp
+ temp <- m
+ m <- n
+ n <- temp
+ }
+
+ # Use two arrays instead of full matrix
+ prev <- rep(0, m + 1)
+ curr <- rep(0, m + 1)
+
+ for (j in 1:(n + 1)) {
+ for (i in 1:(m + 1)) {
+ if (i == 1 || j == 1) {
+ curr[i] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ curr[i] <- prev[i - 1] + 1
+ } else {
+ curr[i] <- max(prev[i], curr[i - 1])
+ }
+ }
+ # Swap arrays
+ temp <- prev
+ prev <- curr
+ curr <- temp
+ }
+
+ return(prev[m + 1])
+}
+
+# Find all possible LCS strings (there can be multiple)
+find_all_lcs <- function(str1, str2) {
+ #' Find all possible longest common subsequences
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List of all LCS strings
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Recursive function to find all LCS
+ find_all_lcs_recursive <- function(i, j) {
+ if (i == 1 || j == 1) {
+ return("")
+ }
+
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ char <- substr(str1, i - 1, i - 1)
+ prev_lcs <- find_all_lcs_recursive(i - 1, j - 1)
+ return(paste0(prev_lcs, char))
+ } else {
+ results <- c()
+
+ if (dp[i - 1, j] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i - 1, j))
+ }
+
+ if (dp[i, j - 1] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i, j - 1))
+ }
+
+ return(unique(results))
+ }
+ }
+
+ all_lcs <- find_all_lcs_recursive(m + 1, n + 1)
+ return(unique(all_lcs))
+}
+
+# LCS for arrays/vectors instead of strings
+lcs_array <- function(arr1, arr2) {
+ #' Find LCS of two arrays/vectors
+ #' @param arr1: First array
+ #' @param arr2: Second array
+ #' @return: List with LCS array and length
+
+ m <- length(arr1)
+ n <- length(arr2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (arr1[i - 1] == arr2[j - 1]) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS
+ lcs <- c()
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (arr1[i - 1] == arr2[j - 1]) {
+ lcs <- c(arr1[i - 1], lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1]
+ ))
+}
+
+# Function to print the DP table (for educational purposes)
+print_dp_table <- function(str1, str2, dp_table) {
+ #' Print the DP table in a readable format
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @param dp_table: DP table from lcs_string function
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ cat("DP Table for LCS calculation:\n")
+ cat("String 1:", str1, "\n")
+ cat("String 2:", str2, "\n\n")
+
+ # Print column headers
+ cat(" ε ")
+ for (j in 1:n) {
+ cat(sprintf("%2s ", substr(str2, j, j)))
+ }
+ cat("\n")
+
+ # Print table rows
+ for (i in 1:(m + 1)) {
+ if (i == 1) {
+ cat(" ε ")
+ } else {
+ cat(sprintf("%2s ", substr(str1, i - 1, i - 1)))
+ }
+
+ for (j in 1:(n + 1)) {
+ cat(sprintf("%2d ", dp_table[i, j]))
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# Example usage and testing
+cat("=== Longest Common Subsequence (LCS) Algorithm ===\n\n")
+```
+
+```
+## === Longest Common Subsequence (LCS) Algorithm ===
+```
+
+``` r
+# Test 1: Basic LCS example
+cat("1. Basic LCS Example\n")
+```
+
+```
+## 1. Basic LCS Example
+```
+
+``` r
+str1 <- "ABCDGH"
+str2 <- "AEDFHR"
+
+cat("String 1:", str1, "\n")
+```
+
+```
+## String 1: ABCDGH
+```
+
+``` r
+cat("String 2:", str2, "\n")
+```
+
+```
+## String 2: AEDFHR
+```
+
+``` r
+result1 <- lcs_string(str1, str2)
+cat("LCS:", result1$lcs, "\n")
+```
+
+```
+## LCS: ADH
+```
+
+``` r
+cat("Length:", result1$length, "\n")
+```
+
+```
+## Length: 3
+```
+
+``` r
+print_dp_table(str1, str2, result1$dp_table)
+```
+
+```
+## DP Table for LCS calculation:
+## String 1: ABCDGH
+## String 2: AEDFHR
+##
+## ε A E D F H R
+## ε 0 0 0 0 0 0 0
+## A 0 1 1 1 1 1 1
+## B 0 1 1 1 1 1 1
+## C 0 1 1 1 1 1 1
+## D 0 1 1 2 2 2 2
+## G 0 1 1 2 2 2 2
+## H 0 1 1 2 2 3 3
+```
+
+``` r
+# Test 2: DNA sequence analysis
+cat("2. DNA Sequence Analysis\n")
+```
+
+```
+## 2. DNA Sequence Analysis
+```
+
+``` r
+dna1 <- "ATCGATCGATCG"
+dna2 <- "ATGCGATGCATG"
+
+cat("DNA Sequence 1:", dna1, "\n")
+```
+
+```
+## DNA Sequence 1: ATCGATCGATCG
+```
+
+``` r
+cat("DNA Sequence 2:", dna2, "\n")
+```
+
+```
+## DNA Sequence 2: ATGCGATGCATG
+```
+
+``` r
+dna_result <- lcs_string(dna1, dna2)
+cat("Common subsequence:", dna_result$lcs, "\n")
+```
+
+```
+## Common subsequence: ATCGATGATG
+```
+
+``` r
+cat("Length:", dna_result$length, "\n")
+```
+
+```
+## Length: 10
+```
+
+``` r
+cat("Similarity:", sprintf("%.2f%%", dna_result$length / max(nchar(dna1), nchar(dna2)) * 100), "\n\n")
+```
+
+```
+## Similarity: 83.33%
+```
+
+``` r
+# Test 3: Finding all possible LCS
+cat("3. Multiple LCS Solutions\n")
+```
+
+```
+## 3. Multiple LCS Solutions
+```
+
+``` r
+str3 <- "ABCDEF"
+str4 <- "ACBDEF"
+
+cat("String 1:", str3, "\n")
+```
+
+```
+## String 1: ABCDEF
+```
+
+``` r
+cat("String 2:", str4, "\n")
+```
+
+```
+## String 2: ACBDEF
+```
+
+``` r
+all_lcs <- find_all_lcs(str3, str4)
+cat("All possible LCS:\n")
+```
+
+```
+## All possible LCS:
+```
+
+``` r
+for (i in seq_along(all_lcs)) {
+ cat(" ", i, ":", all_lcs[i], "\n")
+}
+```
+
+```
+## 1 : ABDEF
+## 2 : ACDEF
+```
+
+``` r
+cat("\n")
+```
+
+``` r
+# Test 4: Array LCS example
+cat("4. Array LCS Example\n")
+```
+
+```
+## 4. Array LCS Example
+```
+
+``` r
+arr1 <- c(1, 2, 3, 4, 5)
+arr2 <- c(2, 3, 5, 7, 8)
+
+cat("Array 1:", paste(arr1, collapse = ", "), "\n")
+```
+
+```
+## Array 1: 1, 2, 3, 4, 5
+```
+
+``` r
+cat("Array 2:", paste(arr2, collapse = ", "), "\n")
+```
+
+```
+## Array 2: 2, 3, 5, 7, 8
+```
+
+``` r
+arr_result <- lcs_array(arr1, arr2)
+cat("LCS Array:", paste(arr_result$lcs, collapse = ", "), "\n")
+```
+
+```
+## LCS Array: 2, 3, 5
+```
+
+``` r
+cat("Length:", arr_result$length, "\n\n")
+```
+
+```
+## Length: 3
+```
+
+``` r
+# Test 5: Performance comparison
+cat("5. Performance Comparison\n")
+```
+
+```
+## 5. Performance Comparison
+```
+
+``` r
+long_str1 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+long_str2 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+
+cat("Testing with strings of length 100...\n")
+```
+
+```
+## Testing with strings of length 100...
+```
+
+``` r
+# Standard algorithm
+start_time <- Sys.time()
+standard_result <- lcs_length(long_str1, long_str2)
+standard_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+# Optimized algorithm
+start_time <- Sys.time()
+optimized_result <- lcs_length_optimized(long_str1, long_str2)
+optimized_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Standard algorithm: LCS length =", standard_result,
+ "Time:", sprintf("%.6f", standard_time), "seconds\n")
+```
+
+```
+## Standard algorithm: LCS length = 59 Time: 0.052980 seconds
+```
+
+``` r
+cat("Optimized algorithm: LCS length =", optimized_result,
+ "Time:", sprintf("%.6f", optimized_time), "seconds\n")
+```
+
+```
+## Optimized algorithm: LCS length = 59 Time: 0.049993 seconds
+```
+
+``` r
+cat("Results match:", standard_result == optimized_result, "\n\n")
+```
+
+```
+## Results match: TRUE
+```
+
+``` r
+# Test 6: Edge cases
+cat("6. Edge Cases\n")
+```
+
+```
+## 6. Edge Cases
+```
+
+``` r
+cat("Empty strings:", lcs_length("", "ABC"), "\n")
+```
+
+```
+## Empty strings: 0
+```
+
+``` r
+cat("One empty string:", lcs_length("ABC", ""), "\n")
+```
+
+```
+## One empty string: 0
+```
+
+``` r
+cat("Identical strings:", lcs_length("HELLO", "HELLO"), "\n")
+```
+
+```
+## Identical strings: 5
+```
+
+``` r
+cat("No common characters:", lcs_length("ABC", "DEF"), "\n")
+```
+
+```
+## No common characters: 0
+```
+
+``` r
+cat("Single character:", lcs_length("A", "A"), "\n")
+```
+
+```
+## Single character: 1
+```
+
+``` r
+cat("Single vs multiple:", lcs_length("A", "ABCDEF"), "\n\n")
+```
+
+```
+## Single vs multiple: 1
+```
+
+``` r
+# Test 7: Real-world example - File diff simulation
+cat("7. File Diff Simulation\n")
+```
+
+```
+## 7. File Diff Simulation
+```
+
+``` r
+file1_lines <- c("Hello World", "This is line 2", "Line 3 here", "Final line")
+file2_lines <- c("Hello World", "This is modified line 2", "Line 3 here", "New line", "Final line")
+
+cat("File 1 lines:\n")
+```
+
+```
+## File 1 lines:
+```
+
+``` r
+for (i in seq_along(file1_lines)) {
+ cat(" ", i, ":", file1_lines[i], "\n")
+}
+```
+
+```
+## 1 : Hello World
+## 2 : This is line 2
+## 3 : Line 3 here
+## 4 : Final line
+```
+
+``` r
+cat("File 2 lines:\n")
+```
+
+```
+## File 2 lines:
+```
+
+``` r
+for (i in seq_along(file2_lines)) {
+ cat(" ", i, ":", file2_lines[i], "\n")
+}
+```
+
+```
+## 1 : Hello World
+## 2 : This is modified line 2
+## 3 : Line 3 here
+## 4 : New line
+## 5 : Final line
+```
+
+``` r
+file_lcs <- lcs_array(file1_lines, file2_lines)
+cat("Common lines (unchanged):\n")
+```
+
+```
+## Common lines (unchanged):
+```
+
+``` r
+for (i in seq_along(file_lcs$lcs)) {
+ cat(" ", file_lcs$lcs[i], "\n")
+}
+```
+
+```
+## Hello World
+## Line 3 here
+## Final line
+```
+
+``` r
+cat("Similarity:", sprintf("%.1f%%", file_lcs$length / max(length(file1_lines), length(file2_lines)) * 100), "\n")
+```
+
+```
+## Similarity: 60.0%
+```
+
diff --git a/Desktop/open-source/R/documentation/longest_increasing_subsequence.html b/Desktop/open-source/R/documentation/longest_increasing_subsequence.html
new file mode 100644
index 00000000..100ae952
--- /dev/null
+++ b/Desktop/open-source/R/documentation/longest_increasing_subsequence.html
@@ -0,0 +1,458 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Longest Increasing Subsequence (Dynamic Programming)
+#
+# The Longest Increasing Subsequence (LIS) problem is a classic dynamic programming problem.
+# Given an array of integers, find the length of the longest subsequence that is strictly
+# increasing. A subsequence is derived from the array by deleting some or no elements
+# without changing the order of the remaining elements.
+#
+# Time Complexity: O(n²) for basic DP, O(n log n) for optimized binary search version
+# Space Complexity: O(n) for both approaches
+#
+# Applications:
+# - Bioinformatics (DNA sequence analysis)
+# - Stock market analysis (longest upward trend)
+# - Scheduling problems
+# - Game theory (optimal play sequences)
+# - Data compression and pattern recognition
+
+# Basic DP solution for Longest Increasing Subsequence
+longest_increasing_subsequence <- function(nums) {
+ #' Find the length of the longest increasing subsequence using Dynamic Programming
+ #' @param nums: Numeric vector of integers
+ #' @return: List containing max length, DP array, and one possible LIS
+
+ n <- length(nums)
+
+ # Handle edge cases
+ if (n == 0) {
+ return(list(
+ max_length = 0,
+ dp_array = c(),
+ lis_sequence = c(),
+ dp_table = c()
+ ))
+ }
+
+ if (n == 1) {
+ return(list(
+ max_length = 1,
+ dp_array = c(1),
+ lis_sequence = nums,
+ dp_table = c(1)
+ ))
+ }
+
+ # Initialize DP array: dp[i] = length of LIS ending at index i
+ dp <- rep(1, n)
+
+ # Fill DP array
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ # Find maximum length
+ max_length <- max(dp)
+
+ # Backtrack to find one possible LIS
+ lis_sequence <- c()
+ current_length <- max_length
+
+ for (i in n:1) {
+ if (dp[i] == current_length) {
+ lis_sequence <- c(nums[i], lis_sequence)
+ current_length <- current_length - 1
+ if (current_length == 0) break
+ }
+ }
+
+ return(list(
+ max_length = max_length,
+ dp_array = dp,
+ lis_sequence = lis_sequence,
+ dp_table = dp
+ ))
+}
+
+# Optimized O(n log n) solution using binary search
+longest_increasing_subsequence_optimized <- function(nums) {
+ #' Find the length of the longest increasing subsequence using binary search
+ #' @param nums: Numeric vector of integers
+ #' @return: Length of the longest increasing subsequence
+
+ n <- length(nums)
+
+ if (n == 0) return(0)
+ if (n == 1) return(1)
+
+ # tails[i] stores the smallest tail of all increasing subsequences of length i+1
+ tails <- c()
+
+ for (num in nums) {
+ # Binary search for the position to replace or extend
+ pos <- binary_search_insert_position(tails, num)
+
+ if (pos > length(tails)) {
+ # Extend the sequence
+ tails <- c(tails, num)
+ } else {
+ # Replace the element at position pos
+ tails[pos] <- num
+ }
+ }
+
+ return(length(tails))
+}
+
+# Helper function for binary search
+binary_search_insert_position <- function(arr, target) {
+ #' Binary search to find the position where target should be inserted
+ #' @param arr: Sorted numeric vector
+ #' @param target: Value to insert
+ #' @return: Position (1-indexed) where target should be inserted
+
+ if (length(arr) == 0) return(1)
+
+ left <- 1
+ right <- length(arr)
+
+ while (left <= right) {
+ mid <- left + (right - left) %/% 2
+
+ if (arr[mid] < target) {
+ left <- mid + 1
+ } else {
+ right <- mid - 1
+ }
+ }
+
+ return(left)
+}
+
+# Function to find all possible LIS sequences (simplified version)
+find_all_lis <- function(nums) {
+ #' Find all possible longest increasing subsequences
+ #' @param nums: Numeric vector of integers
+ #' @return: List of all possible LIS sequences
+
+ n <- length(nums)
+ if (n == 0) return(list())
+
+ # Calculate DP array
+ dp <- rep(1, n)
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ max_length <- max(dp)
+
+ # For simplicity, return just one LIS (same as the main function)
+ # Finding all possible LIS is complex and not essential for the algorithm demonstration
+ result <- longest_increasing_subsequence(nums)
+ return(list(result$lis_sequence))
+}
+
+# Helper function to print DP table
+print_lis_dp <- function(dp_array, nums) {
+ cat("DP Array for Longest Increasing Subsequence:\n")
+ cat("Input Array:", paste(nums, collapse = ", "), "\n")
+ cat("DP Array :", paste(dp_array, collapse = ", "), "\n")
+ cat("Max Length :", max(dp_array), "\n\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Longest Increasing Subsequence (Dynamic Programming) ===\n\n")
+
+
## === Longest Increasing Subsequence (Dynamic Programming) ===
+
+
# Test 1: Basic Example
+nums1 <- c(10, 9, 2, 5, 3, 7, 101, 18)
+cat("Test 1: Basic Example\n")
+
+
## Test 1: Basic Example
+
+
cat("Input Array:", paste(nums1, collapse = ", "), "\n\n")
+
+
## Input Array: 10, 9, 2, 5, 3, 7, 101, 18
+
+
result1 <- longest_increasing_subsequence(nums1)
+print_lis_dp(result1$dp_array, nums1)
+
+
## DP Array for Longest Increasing Subsequence:
+## Input Array: 10, 9, 2, 5, 3, 7, 101, 18
+## DP Array : 1, 1, 1, 2, 2, 3, 4, 4
+## Max Length : 4
+
+
cat("Maximum Length:", result1$max_length, "\n")
+
+
## Maximum Length: 4
+
+
cat("One LIS Sequence:", paste(result1$lis_sequence, collapse = ", "), "\n\n")
+
+
## One LIS Sequence: 2, 3, 7, 18
+
+
# Test 2: Optimized Version
+cat("Test 2: Optimized O(n log n) Version\n")
+
+
## Test 2: Optimized O(n log n) Version
+
+
max_len_opt <- longest_increasing_subsequence_optimized(nums1)
+cat("Maximum Length (Optimized):", max_len_opt, "\n")
+
+
## Maximum Length (Optimized): 4
+
+
cat("Verification: Both methods match:", result1$max_length == max_len_opt, "\n\n")
+
+
## Verification: Both methods match: TRUE
+
+
# Test 3: All Possible LIS
+cat("Test 3: All Possible LIS Sequences\n")
+
+
## Test 3: All Possible LIS Sequences
+
+
all_lis <- find_all_lis(nums1)
+cat("Total number of LIS sequences:", length(all_lis), "\n")
+
+
## Total number of LIS sequences: 1
+
+
for (i in seq_along(all_lis)) {
+ cat("LIS", i, ":", paste(all_lis[[i]], collapse = ", "), "\n")
+}
+
+
## LIS 1 : 2, 3, 7, 18
+
+
cat("\n")
+
+
# Test 4: Edge Cases
+cat("Test 4: Edge Cases\n")
+
+
## Test 4: Edge Cases
+
+
cat("Empty array:", longest_increasing_subsequence(c())$max_length, "\n")
+
+
## Empty array: 0
+
+
cat("Single element:", longest_increasing_subsequence(c(5))$max_length, "\n")
+
+
## Single element: 1
+
+
cat("All same elements:", longest_increasing_subsequence(c(3, 3, 3, 3))$max_length, "\n")
+
+
## All same elements: 1
+
+
cat("Strictly decreasing:", longest_increasing_subsequence(c(5, 4, 3, 2, 1))$max_length, "\n")
+
+
## Strictly decreasing: 1
+
+
cat("Strictly increasing:", longest_increasing_subsequence(c(1, 2, 3, 4, 5))$max_length, "\n\n")
+
+
## Strictly increasing: 5
+
+
# Test 5: Larger Dataset
+cat("Test 5: Larger Dataset (n=20)\n")
+
+
## Test 5: Larger Dataset (n=20)
+
+
set.seed(42)
+nums_large <- sample(1:100, 20)
+cat("Input Array:", paste(nums_large, collapse = ", "), "\n\n")
+
+
## Input Array: 49, 65, 25, 74, 18, 100, 47, 24, 71, 89, 37, 20, 26, 3, 41, 27, 36, 5, 34, 87
+
+
result_large <- longest_increasing_subsequence(nums_large)
+cat("Maximum Length:", result_large$max_length, "\n")
+
+
## Maximum Length: 6
+
+
cat("One LIS Sequence:", paste(result_large$lis_sequence, collapse = ", "), "\n\n")
+
+
## One LIS Sequence: 18, 20, 26, 27, 34, 87
+
+
# Test 6: Performance Comparison
+cat("Test 6: Performance Comparison (n=1000)\n")
+
+
## Test 6: Performance Comparison (n=1000)
+
+
n <- 1000
+nums_perf <- sample(1:1000, n)
+
+start_time <- Sys.time()
+res_opt <- longest_increasing_subsequence_optimized(nums_perf)
+opt_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Optimized O(n log n) result:", res_opt, "\n")
+
+
## Optimized O(n log n) result: 61
+
+
cat("Time taken:", sprintf("%.4f sec", opt_time), "\n")
+
+
## Time taken: 0.0189 sec
+
+
# Verify correctness with basic DP (smaller sample for time comparison)
+nums_small <- nums_perf[1:100]
+start_time <- Sys.time()
+res_basic <- longest_increasing_subsequence(nums_small)
+basic_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Basic O(n²) result (n=100):", res_basic$max_length, "\n")
+
+
## Basic O(n²) result (n=100): 18
+
+
cat("Time taken:", sprintf("%.4f sec", basic_time), "\n")
+
+
## Time taken: 0.0020 sec
+
+
# Test 7: Real-world Example - Stock Prices
+cat("Test 7: Real-world Example - Stock Price Trend\n")
+
+
## Test 7: Real-world Example - Stock Price Trend
+
+
stock_prices <- c(100, 102, 98, 105, 103, 107, 110, 108, 112, 115, 113, 118, 120, 117, 125)
+cat("Stock Prices:", paste(stock_prices, collapse = ", "), "\n")
+
+
## Stock Prices: 100, 102, 98, 105, 103, 107, 110, 108, 112, 115, 113, 118, 120, 117, 125
+
+
stock_result <- longest_increasing_subsequence(stock_prices)
+cat("Longest upward trend length:", stock_result$max_length, "\n")
+
+
## Longest upward trend length: 10
+
+
cat("Longest upward trend:", paste(stock_result$lis_sequence, collapse = ", "), "\n")
+
+
## Longest upward trend: 100, 102, 103, 107, 108, 112, 113, 118, 120, 125
+
+
cat("Percentage increase:",
+ sprintf("%.2f%%", (stock_result$lis_sequence[length(stock_result$lis_sequence)] /
+ stock_result$lis_sequence[1] - 1) * 100), "\n")
+
+
## Percentage increase: 25.00%
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/longest_increasing_subsequence.md b/Desktop/open-source/R/documentation/longest_increasing_subsequence.md
new file mode 100644
index 00000000..671f6663
--- /dev/null
+++ b/Desktop/open-source/R/documentation/longest_increasing_subsequence.md
@@ -0,0 +1,470 @@
+
+
+``` r
+# Longest Increasing Subsequence (Dynamic Programming)
+#
+# The Longest Increasing Subsequence (LIS) problem is a classic dynamic programming problem.
+# Given an array of integers, find the length of the longest subsequence that is strictly
+# increasing. A subsequence is derived from the array by deleting some or no elements
+# without changing the order of the remaining elements.
+#
+# Time Complexity: O(n²) for basic DP, O(n log n) for optimized binary search version
+# Space Complexity: O(n) for both approaches
+#
+# Applications:
+# - Bioinformatics (DNA sequence analysis)
+# - Stock market analysis (longest upward trend)
+# - Scheduling problems
+# - Game theory (optimal play sequences)
+# - Data compression and pattern recognition
+
+# Basic DP solution for Longest Increasing Subsequence
+longest_increasing_subsequence <- function(nums) {
+ #' Find the length of the longest increasing subsequence using Dynamic Programming
+ #' @param nums: Numeric vector of integers
+ #' @return: List containing max length, DP array, and one possible LIS
+
+ n <- length(nums)
+
+ # Handle edge cases
+ if (n == 0) {
+ return(list(
+ max_length = 0,
+ dp_array = c(),
+ lis_sequence = c(),
+ dp_table = c()
+ ))
+ }
+
+ if (n == 1) {
+ return(list(
+ max_length = 1,
+ dp_array = c(1),
+ lis_sequence = nums,
+ dp_table = c(1)
+ ))
+ }
+
+ # Initialize DP array: dp[i] = length of LIS ending at index i
+ dp <- rep(1, n)
+
+ # Fill DP array
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ # Find maximum length
+ max_length <- max(dp)
+
+ # Backtrack to find one possible LIS
+ lis_sequence <- c()
+ current_length <- max_length
+
+ for (i in n:1) {
+ if (dp[i] == current_length) {
+ lis_sequence <- c(nums[i], lis_sequence)
+ current_length <- current_length - 1
+ if (current_length == 0) break
+ }
+ }
+
+ return(list(
+ max_length = max_length,
+ dp_array = dp,
+ lis_sequence = lis_sequence,
+ dp_table = dp
+ ))
+}
+
+# Optimized O(n log n) solution using binary search
+longest_increasing_subsequence_optimized <- function(nums) {
+ #' Find the length of the longest increasing subsequence using binary search
+ #' @param nums: Numeric vector of integers
+ #' @return: Length of the longest increasing subsequence
+
+ n <- length(nums)
+
+ if (n == 0) return(0)
+ if (n == 1) return(1)
+
+ # tails[i] stores the smallest tail of all increasing subsequences of length i+1
+ tails <- c()
+
+ for (num in nums) {
+ # Binary search for the position to replace or extend
+ pos <- binary_search_insert_position(tails, num)
+
+ if (pos > length(tails)) {
+ # Extend the sequence
+ tails <- c(tails, num)
+ } else {
+ # Replace the element at position pos
+ tails[pos] <- num
+ }
+ }
+
+ return(length(tails))
+}
+
+# Helper function for binary search
+binary_search_insert_position <- function(arr, target) {
+ #' Binary search to find the position where target should be inserted
+ #' @param arr: Sorted numeric vector
+ #' @param target: Value to insert
+ #' @return: Position (1-indexed) where target should be inserted
+
+ if (length(arr) == 0) return(1)
+
+ left <- 1
+ right <- length(arr)
+
+ while (left <= right) {
+ mid <- left + (right - left) %/% 2
+
+ if (arr[mid] < target) {
+ left <- mid + 1
+ } else {
+ right <- mid - 1
+ }
+ }
+
+ return(left)
+}
+
+# Function to find all possible LIS sequences (simplified version)
+find_all_lis <- function(nums) {
+ #' Find all possible longest increasing subsequences
+ #' @param nums: Numeric vector of integers
+ #' @return: List of all possible LIS sequences
+
+ n <- length(nums)
+ if (n == 0) return(list())
+
+ # Calculate DP array
+ dp <- rep(1, n)
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ max_length <- max(dp)
+
+ # For simplicity, return just one LIS (same as the main function)
+ # Finding all possible LIS is complex and not essential for the algorithm demonstration
+ result <- longest_increasing_subsequence(nums)
+ return(list(result$lis_sequence))
+}
+
+# Helper function to print DP table
+print_lis_dp <- function(dp_array, nums) {
+ cat("DP Array for Longest Increasing Subsequence:\n")
+ cat("Input Array:", paste(nums, collapse = ", "), "\n")
+ cat("DP Array :", paste(dp_array, collapse = ", "), "\n")
+ cat("Max Length :", max(dp_array), "\n\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Longest Increasing Subsequence (Dynamic Programming) ===\n\n")
+```
+
+```
+## === Longest Increasing Subsequence (Dynamic Programming) ===
+```
+
+``` r
+# Test 1: Basic Example
+nums1 <- c(10, 9, 2, 5, 3, 7, 101, 18)
+cat("Test 1: Basic Example\n")
+```
+
+```
+## Test 1: Basic Example
+```
+
+``` r
+cat("Input Array:", paste(nums1, collapse = ", "), "\n\n")
+```
+
+```
+## Input Array: 10, 9, 2, 5, 3, 7, 101, 18
+```
+
+``` r
+result1 <- longest_increasing_subsequence(nums1)
+print_lis_dp(result1$dp_array, nums1)
+```
+
+```
+## DP Array for Longest Increasing Subsequence:
+## Input Array: 10, 9, 2, 5, 3, 7, 101, 18
+## DP Array : 1, 1, 1, 2, 2, 3, 4, 4
+## Max Length : 4
+```
+
+``` r
+cat("Maximum Length:", result1$max_length, "\n")
+```
+
+```
+## Maximum Length: 4
+```
+
+``` r
+cat("One LIS Sequence:", paste(result1$lis_sequence, collapse = ", "), "\n\n")
+```
+
+```
+## One LIS Sequence: 2, 3, 7, 18
+```
+
+``` r
+# Test 2: Optimized Version
+cat("Test 2: Optimized O(n log n) Version\n")
+```
+
+```
+## Test 2: Optimized O(n log n) Version
+```
+
+``` r
+max_len_opt <- longest_increasing_subsequence_optimized(nums1)
+cat("Maximum Length (Optimized):", max_len_opt, "\n")
+```
+
+```
+## Maximum Length (Optimized): 4
+```
+
+``` r
+cat("Verification: Both methods match:", result1$max_length == max_len_opt, "\n\n")
+```
+
+```
+## Verification: Both methods match: TRUE
+```
+
+``` r
+# Test 3: All Possible LIS
+cat("Test 3: All Possible LIS Sequences\n")
+```
+
+```
+## Test 3: All Possible LIS Sequences
+```
+
+``` r
+all_lis <- find_all_lis(nums1)
+cat("Total number of LIS sequences:", length(all_lis), "\n")
+```
+
+```
+## Total number of LIS sequences: 1
+```
+
+``` r
+for (i in seq_along(all_lis)) {
+ cat("LIS", i, ":", paste(all_lis[[i]], collapse = ", "), "\n")
+}
+```
+
+```
+## LIS 1 : 2, 3, 7, 18
+```
+
+``` r
+cat("\n")
+```
+
+``` r
+# Test 4: Edge Cases
+cat("Test 4: Edge Cases\n")
+```
+
+```
+## Test 4: Edge Cases
+```
+
+``` r
+cat("Empty array:", longest_increasing_subsequence(c())$max_length, "\n")
+```
+
+```
+## Empty array: 0
+```
+
+``` r
+cat("Single element:", longest_increasing_subsequence(c(5))$max_length, "\n")
+```
+
+```
+## Single element: 1
+```
+
+``` r
+cat("All same elements:", longest_increasing_subsequence(c(3, 3, 3, 3))$max_length, "\n")
+```
+
+```
+## All same elements: 1
+```
+
+``` r
+cat("Strictly decreasing:", longest_increasing_subsequence(c(5, 4, 3, 2, 1))$max_length, "\n")
+```
+
+```
+## Strictly decreasing: 1
+```
+
+``` r
+cat("Strictly increasing:", longest_increasing_subsequence(c(1, 2, 3, 4, 5))$max_length, "\n\n")
+```
+
+```
+## Strictly increasing: 5
+```
+
+``` r
+# Test 5: Larger Dataset
+cat("Test 5: Larger Dataset (n=20)\n")
+```
+
+```
+## Test 5: Larger Dataset (n=20)
+```
+
+``` r
+set.seed(42)
+nums_large <- sample(1:100, 20)
+cat("Input Array:", paste(nums_large, collapse = ", "), "\n\n")
+```
+
+```
+## Input Array: 49, 65, 25, 74, 18, 100, 47, 24, 71, 89, 37, 20, 26, 3, 41, 27, 36, 5, 34, 87
+```
+
+``` r
+result_large <- longest_increasing_subsequence(nums_large)
+cat("Maximum Length:", result_large$max_length, "\n")
+```
+
+```
+## Maximum Length: 6
+```
+
+``` r
+cat("One LIS Sequence:", paste(result_large$lis_sequence, collapse = ", "), "\n\n")
+```
+
+```
+## One LIS Sequence: 18, 20, 26, 27, 34, 87
+```
+
+``` r
+# Test 6: Performance Comparison
+cat("Test 6: Performance Comparison (n=1000)\n")
+```
+
+```
+## Test 6: Performance Comparison (n=1000)
+```
+
+``` r
+n <- 1000
+nums_perf <- sample(1:1000, n)
+
+start_time <- Sys.time()
+res_opt <- longest_increasing_subsequence_optimized(nums_perf)
+opt_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Optimized O(n log n) result:", res_opt, "\n")
+```
+
+```
+## Optimized O(n log n) result: 61
+```
+
+``` r
+cat("Time taken:", sprintf("%.4f sec", opt_time), "\n")
+```
+
+```
+## Time taken: 0.0189 sec
+```
+
+``` r
+# Verify correctness with basic DP (smaller sample for time comparison)
+nums_small <- nums_perf[1:100]
+start_time <- Sys.time()
+res_basic <- longest_increasing_subsequence(nums_small)
+basic_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Basic O(n²) result (n=100):", res_basic$max_length, "\n")
+```
+
+```
+## Basic O(n²) result (n=100): 18
+```
+
+``` r
+cat("Time taken:", sprintf("%.4f sec", basic_time), "\n")
+```
+
+```
+## Time taken: 0.0020 sec
+```
+
+``` r
+# Test 7: Real-world Example - Stock Prices
+cat("Test 7: Real-world Example - Stock Price Trend\n")
+```
+
+```
+## Test 7: Real-world Example - Stock Price Trend
+```
+
+``` r
+stock_prices <- c(100, 102, 98, 105, 103, 107, 110, 108, 112, 115, 113, 118, 120, 117, 125)
+cat("Stock Prices:", paste(stock_prices, collapse = ", "), "\n")
+```
+
+```
+## Stock Prices: 100, 102, 98, 105, 103, 107, 110, 108, 112, 115, 113, 118, 120, 117, 125
+```
+
+``` r
+stock_result <- longest_increasing_subsequence(stock_prices)
+cat("Longest upward trend length:", stock_result$max_length, "\n")
+```
+
+```
+## Longest upward trend length: 10
+```
+
+``` r
+cat("Longest upward trend:", paste(stock_result$lis_sequence, collapse = ", "), "\n")
+```
+
+```
+## Longest upward trend: 100, 102, 103, 107, 108, 112, 113, 118, 120, 125
+```
+
+``` r
+cat("Percentage increase:",
+ sprintf("%.2f%%", (stock_result$lis_sequence[length(stock_result$lis_sequence)] /
+ stock_result$lis_sequence[1] - 1) * 100), "\n")
+```
+
+```
+## Percentage increase: 25.00%
+```
+
diff --git a/Desktop/open-source/R/documentation/multiple_linear_regression.html b/Desktop/open-source/R/documentation/multiple_linear_regression.html
new file mode 100644
index 00000000..c6b7a89d
--- /dev/null
+++ b/Desktop/open-source/R/documentation/multiple_linear_regression.html
@@ -0,0 +1,138 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# Introduction to multiple linear regression
+
+# lm stands for Linear Model
+# y_data are modeled as a.x1 + b.x2 + c.x3 + d.x4 + e
+mod3 <- lm(y_data~x1_data+x2_data+x3_data+x4_data, data=name_of_the_dataframe)
+
+
## Error in is.data.frame(data): object 'name_of_the_dataframe' not found
+
+
# displays the output of the model computed by the previous line
+summary(mod3)
+
+
## Error in summary(mod3): object 'mod3' not found
+
+
# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod3, x1_test_data, x2_test_data, x3_test_data, x4_test_data)
+
+
## Error in predict(mod3, x1_test_data, x2_test_data, x3_test_data, x4_test_data): object 'mod3' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/multiple_linear_regression.md b/Desktop/open-source/R/documentation/multiple_linear_regression.md
new file mode 100644
index 00000000..fbfbfdbf
--- /dev/null
+++ b/Desktop/open-source/R/documentation/multiple_linear_regression.md
@@ -0,0 +1,32 @@
+
+
+``` r
+# Introduction to multiple linear regression
+
+# lm stands for Linear Model
+# y_data are modeled as a.x1 + b.x2 + c.x3 + d.x4 + e
+mod3 <- lm(y_data~x1_data+x2_data+x3_data+x4_data, data=name_of_the_dataframe)
+```
+
+```
+## Error in is.data.frame(data): object 'name_of_the_dataframe' not found
+```
+
+``` r
+# displays the output of the model computed by the previous line
+summary(mod3)
+```
+
+```
+## Error in summary(mod3): object 'mod3' not found
+```
+
+``` r
+# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod3, x1_test_data, x2_test_data, x3_test_data, x4_test_data)
+```
+
+```
+## Error in predict(mod3, x1_test_data, x2_test_data, x3_test_data, x4_test_data): object 'mod3' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/naive_bayes.html b/Desktop/open-source/R/documentation/naive_bayes.html
new file mode 100644
index 00000000..bfc05e38
--- /dev/null
+++ b/Desktop/open-source/R/documentation/naive_bayes.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(e1071)
+
+
## Error in library(e1071): there is no package called 'e1071'
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Fitting model
+fit <-naiveBayes(y_train ~ ., data = x)
+
+
## Error in naiveBayes(y_train ~ ., data = x): could not find function "naiveBayes"
+
+
summary(fit)
+
+
## Error in summary(fit): object 'fit' not found
+
+
# Predict Output
+predicted= predict(fit,x_test)
+
+
## Error in predict(fit, x_test): object 'fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/naive_bayes.md b/Desktop/open-source/R/documentation/naive_bayes.md
new file mode 100644
index 00000000..ebbc79a9
--- /dev/null
+++ b/Desktop/open-source/R/documentation/naive_bayes.md
@@ -0,0 +1,44 @@
+
+
+``` r
+library(e1071)
+```
+
+```
+## Error in library(e1071): there is no package called 'e1071'
+```
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Fitting model
+fit <-naiveBayes(y_train ~ ., data = x)
+```
+
+```
+## Error in naiveBayes(y_train ~ ., data = x): could not find function "naiveBayes"
+```
+
+``` r
+summary(fit)
+```
+
+```
+## Error in summary(fit): object 'fit' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(fit,x_test)
+```
+
+```
+## Error in predict(fit, x_test): object 'fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/pam.html b/Desktop/open-source/R/documentation/pam.html
new file mode 100644
index 00000000..7c4d3ed0
--- /dev/null
+++ b/Desktop/open-source/R/documentation/pam.html
@@ -0,0 +1,314 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(cluster)
+pam_fit <- pam(iris[, 1:4], 5) # Partition Around Medoids
+summary(pam_fit) # Get summary
+
+
## Medoids:
+## ID Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] 8 5.0 3.4 1.5 0.2
+## [2,] 64 6.1 2.9 4.7 1.4
+## [3,] 70 5.6 2.5 3.9 1.1
+## [4,] 113 6.8 3.0 5.5 2.1
+## [5,] 106 7.6 3.0 6.6 2.1
+## Clustering vector:
+## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 2 2 2 3 2 3 3 2 3 2 3 2 2 3 2 3 2 3 2 2
+## [75] 2 2 2 4 2 3 3 3 3 2 2 2 2 2 3 3 3 2 3 3 3 3 3 2 3 3 4 2 4 4 4 5 3 5 4 5 4
+## [112] 4 4 2 2 4 4 5 5 2 4 2 5 2 4 4 2 2 4 4 5 5 4 2 2 5 4 4 2 4 4 4 2 4 4 4 2 4
+## [149] 4 2
+## Objective function:
+## build swap
+## 0.5520959 0.5272835
+##
+## Numerical information per cluster:
+## size max_diss av_diss diameter separation
+## [1,] 50 1.2369317 0.4846000 2.428992 1.6401219
+## [2,] 40 1.1224972 0.5874690 1.661325 0.3000000
+## [3,] 24 1.1000000 0.5205001 1.627882 0.3000000
+## [4,] 27 0.8660254 0.5077127 1.374773 0.3162278
+## [5,] 9 0.9643651 0.5737248 1.389244 0.4358899
+##
+## Isolated clusters:
+## L-clusters: character(0)
+## L*-clusters: character(0)
+##
+## Silhouette plot information:
+## cluster neighbor sil_width
+## 1 1 3 0.823782713
+## 8 1 3 0.822979939
+## 18 1 3 0.821191829
+## 50 1 3 0.820946901
+## 5 1 3 0.819989654
+## 41 1 3 0.819878967
+## 40 1 3 0.818905711
+## 29 1 3 0.812116253
+## 38 1 3 0.811246460
+## 28 1 3 0.810123090
+## 12 1 3 0.799834938
+## 36 1 3 0.798299736
+## 27 1 3 0.796367017
+## 3 1 3 0.793333297
+## 22 1 3 0.791981289
+## 35 1 3 0.789669653
+## 20 1 3 0.786689337
+## 10 1 3 0.785945881
+## 7 1 3 0.784292003
+## 49 1 3 0.784257843
+## 48 1 3 0.779752099
+## 47 1 3 0.777942371
+## 30 1 3 0.775746314
+## 2 1 3 0.773966636
+## 31 1 3 0.771500461
+## 13 1 3 0.768378249
+## 11 1 3 0.766919198
+## 46 1 3 0.766389968
+## 4 1 3 0.760528648
+## 32 1 3 0.757686027
+## 37 1 3 0.756698504
+## 44 1 3 0.756592820
+## 23 1 3 0.756110206
+## 26 1 3 0.750347697
+## 24 1 3 0.745215991
+## 43 1 3 0.741959676
+## 17 1 3 0.738559175
+## 21 1 3 0.737981532
+## 33 1 3 0.724060094
+## 25 1 3 0.721037402
+## 39 1 3 0.718527842
+## 6 1 3 0.702644402
+## 9 1 3 0.696132723
+## 14 1 3 0.695119096
+## 45 1 3 0.694338305
+## 34 1 3 0.681434226
+## 15 1 3 0.660358295
+## 19 1 3 0.655738366
+## 16 1 3 0.597147161
+## 42 1 3 0.555054870
+## 64 2 3 0.471339064
+## 55 2 4 0.458317613
+## 52 2 4 0.443534424
+## 92 2 3 0.433142280
+## 59 2 4 0.431621613
+## 139 2 4 0.417405565
+## 76 2 3 0.415948453
+## 127 2 4 0.402579655
+## 73 2 4 0.399050400
+## 66 2 4 0.388387404
+## 74 2 3 0.372099711
+## 71 2 4 0.366704189
+## 57 2 4 0.363901411
+## 84 2 4 0.349237676
+## 79 2 3 0.347927387
+## 128 2 4 0.344414971
+## 86 2 3 0.342879650
+## 120 2 3 0.339139851
+## 122 2 4 0.324164232
+## 124 2 4 0.310947933
+## 87 2 4 0.296111811
+## 75 2 3 0.294712248
+## 114 2 4 0.287220191
+## 102 2 4 0.276114979
+## 143 2 4 0.276114979
+## 134 2 4 0.274416839
+## 77 2 4 0.262524104
+## 150 2 4 0.250702974
+## 98 2 3 0.207008254
+## 69 2 3 0.206965943
+## 147 2 4 0.186361678
+## 51 2 4 0.158584073
+## 88 2 3 0.154974194
+## 67 2 3 0.080197066
+## 53 2 4 0.068704364
+## 135 2 4 0.009859737
+## 115 2 4 0.007411473
+## 62 2 3 -0.010322685
+## 56 2 3 -0.018480568
+## 85 2 3 -0.056548190
+## 81 3 2 0.593063391
+## 82 3 2 0.588486416
+## 70 3 2 0.577829255
+## 90 3 2 0.541572577
+## 80 3 2 0.540075551
+## 94 3 2 0.527900827
+## 58 3 2 0.518504963
+## 54 3 2 0.510585771
+## 60 3 2 0.509830210
+## 61 3 2 0.497940985
+## 65 3 2 0.497569203
+## 83 3 2 0.489146497
+## 93 3 2 0.469694889
+## 99 3 1 0.458807124
+## 68 3 2 0.409714352
+## 100 3 2 0.400450702
+## 95 3 2 0.396521625
+## 89 3 2 0.348880615
+## 63 3 2 0.336954121
+## 91 3 2 0.295905041
+## 97 3 2 0.281018209
+## 96 3 2 0.262894448
+## 107 3 2 0.186381085
+## 72 3 2 0.161825540
+## 113 4 2 0.577935875
+## 105 4 5 0.577360661
+## 141 4 5 0.575917221
+## 125 4 5 0.550165260
+## 140 4 2 0.541953152
+## 133 4 2 0.496625982
+## 145 4 5 0.487427796
+## 137 4 2 0.486932219
+## 129 4 2 0.484485212
+## 121 4 5 0.461832625
+## 146 4 2 0.455480506
+## 116 4 2 0.450650676
+## 117 4 2 0.425816992
+## 101 4 5 0.425555783
+## 142 4 2 0.407031174
+## 144 4 5 0.401369810
+## 138 4 2 0.394312582
+## 149 4 2 0.388080059
+## 109 4 2 0.374157348
+## 104 4 2 0.351135503
+## 148 4 2 0.328372054
+## 111 4 2 0.256998908
+## 112 4 2 0.192307087
+## 103 4 5 0.154141472
+## 130 4 5 0.105352310
+## 78 4 2 -0.016348526
+## 126 4 5 -0.082878191
+## 106 5 4 0.565971497
+## 123 5 4 0.546749956
+## 119 5 4 0.477509712
+## 118 5 4 0.459700515
+## 132 5 4 0.439388428
+## 136 5 4 0.409060616
+## 108 5 4 0.312571293
+## 131 5 4 0.253594219
+## 110 5 4 0.082867697
+## Average silhouette width per cluster:
+## [1] 0.7575140 0.2733844 0.4333981 0.3797101 0.3941571
+## Average silhouette width of total data set:
+## [1] 0.4867481
+##
+## Available components:
+## [1] "medoids" "id.med" "clustering" "objective" "isolation"
+## [6] "clusinfo" "silinfo" "diss" "call" "data"
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/pam.md b/Desktop/open-source/R/documentation/pam.md
new file mode 100644
index 00000000..9428c3f1
--- /dev/null
+++ b/Desktop/open-source/R/documentation/pam.md
@@ -0,0 +1,200 @@
+
+
+``` r
+library(cluster)
+pam_fit <- pam(iris[, 1:4], 5) # Partition Around Medoids
+summary(pam_fit) # Get summary
+```
+
+```
+## Medoids:
+## ID Sepal.Length Sepal.Width Petal.Length Petal.Width
+## [1,] 8 5.0 3.4 1.5 0.2
+## [2,] 64 6.1 2.9 4.7 1.4
+## [3,] 70 5.6 2.5 3.9 1.1
+## [4,] 113 6.8 3.0 5.5 2.1
+## [5,] 106 7.6 3.0 6.6 2.1
+## Clustering vector:
+## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 2 2 2 3 2 3 3 2 3 2 3 2 2 3 2 3 2 3 2 2
+## [75] 2 2 2 4 2 3 3 3 3 2 2 2 2 2 3 3 3 2 3 3 3 3 3 2 3 3 4 2 4 4 4 5 3 5 4 5 4
+## [112] 4 4 2 2 4 4 5 5 2 4 2 5 2 4 4 2 2 4 4 5 5 4 2 2 5 4 4 2 4 4 4 2 4 4 4 2 4
+## [149] 4 2
+## Objective function:
+## build swap
+## 0.5520959 0.5272835
+##
+## Numerical information per cluster:
+## size max_diss av_diss diameter separation
+## [1,] 50 1.2369317 0.4846000 2.428992 1.6401219
+## [2,] 40 1.1224972 0.5874690 1.661325 0.3000000
+## [3,] 24 1.1000000 0.5205001 1.627882 0.3000000
+## [4,] 27 0.8660254 0.5077127 1.374773 0.3162278
+## [5,] 9 0.9643651 0.5737248 1.389244 0.4358899
+##
+## Isolated clusters:
+## L-clusters: character(0)
+## L*-clusters: character(0)
+##
+## Silhouette plot information:
+## cluster neighbor sil_width
+## 1 1 3 0.823782713
+## 8 1 3 0.822979939
+## 18 1 3 0.821191829
+## 50 1 3 0.820946901
+## 5 1 3 0.819989654
+## 41 1 3 0.819878967
+## 40 1 3 0.818905711
+## 29 1 3 0.812116253
+## 38 1 3 0.811246460
+## 28 1 3 0.810123090
+## 12 1 3 0.799834938
+## 36 1 3 0.798299736
+## 27 1 3 0.796367017
+## 3 1 3 0.793333297
+## 22 1 3 0.791981289
+## 35 1 3 0.789669653
+## 20 1 3 0.786689337
+## 10 1 3 0.785945881
+## 7 1 3 0.784292003
+## 49 1 3 0.784257843
+## 48 1 3 0.779752099
+## 47 1 3 0.777942371
+## 30 1 3 0.775746314
+## 2 1 3 0.773966636
+## 31 1 3 0.771500461
+## 13 1 3 0.768378249
+## 11 1 3 0.766919198
+## 46 1 3 0.766389968
+## 4 1 3 0.760528648
+## 32 1 3 0.757686027
+## 37 1 3 0.756698504
+## 44 1 3 0.756592820
+## 23 1 3 0.756110206
+## 26 1 3 0.750347697
+## 24 1 3 0.745215991
+## 43 1 3 0.741959676
+## 17 1 3 0.738559175
+## 21 1 3 0.737981532
+## 33 1 3 0.724060094
+## 25 1 3 0.721037402
+## 39 1 3 0.718527842
+## 6 1 3 0.702644402
+## 9 1 3 0.696132723
+## 14 1 3 0.695119096
+## 45 1 3 0.694338305
+## 34 1 3 0.681434226
+## 15 1 3 0.660358295
+## 19 1 3 0.655738366
+## 16 1 3 0.597147161
+## 42 1 3 0.555054870
+## 64 2 3 0.471339064
+## 55 2 4 0.458317613
+## 52 2 4 0.443534424
+## 92 2 3 0.433142280
+## 59 2 4 0.431621613
+## 139 2 4 0.417405565
+## 76 2 3 0.415948453
+## 127 2 4 0.402579655
+## 73 2 4 0.399050400
+## 66 2 4 0.388387404
+## 74 2 3 0.372099711
+## 71 2 4 0.366704189
+## 57 2 4 0.363901411
+## 84 2 4 0.349237676
+## 79 2 3 0.347927387
+## 128 2 4 0.344414971
+## 86 2 3 0.342879650
+## 120 2 3 0.339139851
+## 122 2 4 0.324164232
+## 124 2 4 0.310947933
+## 87 2 4 0.296111811
+## 75 2 3 0.294712248
+## 114 2 4 0.287220191
+## 102 2 4 0.276114979
+## 143 2 4 0.276114979
+## 134 2 4 0.274416839
+## 77 2 4 0.262524104
+## 150 2 4 0.250702974
+## 98 2 3 0.207008254
+## 69 2 3 0.206965943
+## 147 2 4 0.186361678
+## 51 2 4 0.158584073
+## 88 2 3 0.154974194
+## 67 2 3 0.080197066
+## 53 2 4 0.068704364
+## 135 2 4 0.009859737
+## 115 2 4 0.007411473
+## 62 2 3 -0.010322685
+## 56 2 3 -0.018480568
+## 85 2 3 -0.056548190
+## 81 3 2 0.593063391
+## 82 3 2 0.588486416
+## 70 3 2 0.577829255
+## 90 3 2 0.541572577
+## 80 3 2 0.540075551
+## 94 3 2 0.527900827
+## 58 3 2 0.518504963
+## 54 3 2 0.510585771
+## 60 3 2 0.509830210
+## 61 3 2 0.497940985
+## 65 3 2 0.497569203
+## 83 3 2 0.489146497
+## 93 3 2 0.469694889
+## 99 3 1 0.458807124
+## 68 3 2 0.409714352
+## 100 3 2 0.400450702
+## 95 3 2 0.396521625
+## 89 3 2 0.348880615
+## 63 3 2 0.336954121
+## 91 3 2 0.295905041
+## 97 3 2 0.281018209
+## 96 3 2 0.262894448
+## 107 3 2 0.186381085
+## 72 3 2 0.161825540
+## 113 4 2 0.577935875
+## 105 4 5 0.577360661
+## 141 4 5 0.575917221
+## 125 4 5 0.550165260
+## 140 4 2 0.541953152
+## 133 4 2 0.496625982
+## 145 4 5 0.487427796
+## 137 4 2 0.486932219
+## 129 4 2 0.484485212
+## 121 4 5 0.461832625
+## 146 4 2 0.455480506
+## 116 4 2 0.450650676
+## 117 4 2 0.425816992
+## 101 4 5 0.425555783
+## 142 4 2 0.407031174
+## 144 4 5 0.401369810
+## 138 4 2 0.394312582
+## 149 4 2 0.388080059
+## 109 4 2 0.374157348
+## 104 4 2 0.351135503
+## 148 4 2 0.328372054
+## 111 4 2 0.256998908
+## 112 4 2 0.192307087
+## 103 4 5 0.154141472
+## 130 4 5 0.105352310
+## 78 4 2 -0.016348526
+## 126 4 5 -0.082878191
+## 106 5 4 0.565971497
+## 123 5 4 0.546749956
+## 119 5 4 0.477509712
+## 118 5 4 0.459700515
+## 132 5 4 0.439388428
+## 136 5 4 0.409060616
+## 108 5 4 0.312571293
+## 131 5 4 0.253594219
+## 110 5 4 0.082867697
+## Average silhouette width per cluster:
+## [1] 0.7575140 0.2733844 0.4333981 0.3797101 0.3941571
+## Average silhouette width of total data set:
+## [1] 0.4867481
+##
+## Available components:
+## [1] "medoids" "id.med" "clustering" "objective" "isolation"
+## [6] "clusinfo" "silinfo" "diss" "call" "data"
+```
+
diff --git a/Desktop/open-source/R/documentation/random_forest.html b/Desktop/open-source/R/documentation/random_forest.html
new file mode 100644
index 00000000..046c94c5
--- /dev/null
+++ b/Desktop/open-source/R/documentation/random_forest.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(randomForest)
+
+
## Error in library(randomForest): there is no package called 'randomForest'
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Fitting model
+fit <- randomForest(Species ~ ., x,ntree=500)
+
+
## Error in randomForest(Species ~ ., x, ntree = 500): could not find function "randomForest"
+
+
summary(fit)
+
+
## Error in summary(fit): object 'fit' not found
+
+
# Predict Output
+predicted= predict(fit,x_test)
+
+
## Error in predict(fit, x_test): object 'fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/random_forest.md b/Desktop/open-source/R/documentation/random_forest.md
new file mode 100644
index 00000000..63714ea2
--- /dev/null
+++ b/Desktop/open-source/R/documentation/random_forest.md
@@ -0,0 +1,44 @@
+
+
+``` r
+library(randomForest)
+```
+
+```
+## Error in library(randomForest): there is no package called 'randomForest'
+```
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Fitting model
+fit <- randomForest(Species ~ ., x,ntree=500)
+```
+
+```
+## Error in randomForest(Species ~ ., x, ntree = 500): could not find function "randomForest"
+```
+
+``` r
+summary(fit)
+```
+
+```
+## Error in summary(fit): object 'fit' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(fit,x_test)
+```
+
+```
+## Error in predict(fit, x_test): object 'fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/svm.html b/Desktop/open-source/R/documentation/svm.html
new file mode 100644
index 00000000..f4a95615
--- /dev/null
+++ b/Desktop/open-source/R/documentation/svm.html
@@ -0,0 +1,142 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(e1071)
+
+
## Error in library(e1071): there is no package called 'e1071'
+
+
x <- cbind(x_train,y_train)
+
+
## Error in cbind(x_train, y_train): object 'x_train' not found
+
+
# Fitting model
+fit <-svm(y_train ~ ., data = x)
+
+
## Error in svm(y_train ~ ., data = x): could not find function "svm"
+
+
summary(fit)
+
+
## Error in summary(fit): object 'fit' not found
+
+
# Predict Output
+predicted= predict(fit,x_test)
+
+
## Error in predict(fit, x_test): object 'fit' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/svm.md b/Desktop/open-source/R/documentation/svm.md
new file mode 100644
index 00000000..cb984245
--- /dev/null
+++ b/Desktop/open-source/R/documentation/svm.md
@@ -0,0 +1,44 @@
+
+
+``` r
+library(e1071)
+```
+
+```
+## Error in library(e1071): there is no package called 'e1071'
+```
+
+``` r
+x <- cbind(x_train,y_train)
+```
+
+```
+## Error in cbind(x_train, y_train): object 'x_train' not found
+```
+
+``` r
+# Fitting model
+fit <-svm(y_train ~ ., data = x)
+```
+
+```
+## Error in svm(y_train ~ ., data = x): could not find function "svm"
+```
+
+``` r
+summary(fit)
+```
+
+```
+## Error in summary(fit): object 'fit' not found
+```
+
+``` r
+# Predict Output
+predicted= predict(fit,x_test)
+```
+
+```
+## Error in predict(fit, x_test): object 'fit' not found
+```
+
diff --git a/Desktop/open-source/R/documentation/xgboost.html b/Desktop/open-source/R/documentation/xgboost.html
new file mode 100644
index 00000000..c3407142
--- /dev/null
+++ b/Desktop/open-source/R/documentation/xgboost.html
@@ -0,0 +1,174 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
library(tidyverse)
+
+
## Error in library(tidyverse): there is no package called 'tidyverse'
+
+
library(xgboost)
+
+
## Error in library(xgboost): there is no package called 'xgboost'
+
+
ind<-sample(2,nrow(diamonds),replace = T,prob = c(0.7,0.3))
+
+
## Error in nrow(diamonds): object 'diamonds' not found
+
+
train.set<-diamonds[ind==1,]
+
+
## Error: object 'diamonds' not found
+
+
test.set<-diamonds[ind==2,]
+
+
## Error: object 'diamonds' not found
+
+
xgb.train<-bind_cols(select_if(train.set,is.numeric),model.matrix(~cut-1,train.set) %>% as.tibble(),model.matrix(~color-1,train.set) %>% as.tibble(),model.matrix(~clarity-1,train.set) %>% as.tibble())
+
+
## Error in bind_cols(select_if(train.set, is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+
+
xgboost.train<-xgb.DMatrix(data = as.matrix(select(xgb.train,-price)),label=xgb.train$price)
+
+
## Error in xgb.DMatrix(data = as.matrix(select(xgb.train, -price)), label = xgb.train$price): could not find function "xgb.DMatrix"
+
+
xgb.test<-bind_cols(select_if(test.set,is.numeric),model.matrix(~cut-1,test.set) %>% as.tibble(),model.matrix(~color-1,test.set) %>% as.tibble(),model.matrix(~clarity-1,test.set) %>% as.tibble())
+
+
## Error in bind_cols(select_if(test.set, is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+
+
xgboost.test<-xgb.DMatrix(data = select(xgb.test,-price) %>% as.matrix(),label=xgb.test$price)
+
+
## Error in xgb.DMatrix(data = select(xgb.test, -price) %>% as.matrix(), : could not find function "xgb.DMatrix"
+
+
param<-list(eval_metric='rmse',gamma=1,max_depth=6,nthread = 3)
+xg.model<-xgb.train(data = xgboost.train,params = param,watchlist = list(test=xgboost.test),nrounds = 500,early_stopping_rounds = 60,
+ print_every_n = 30)
+
+
## Error in xgb.train(data = xgboost.train, params = param, watchlist = list(test = xgboost.test), : could not find function "xgb.train"
+
+
xg.predict<-predict(xg.model,xgboost.test)
+
+
## Error in predict(xg.model, xgboost.test): object 'xg.model' not found
+
+
mse.xgb<-sqrt(mean((test.set$price-xg.predict)^2))
+
+
## Error in mean((test.set$price - xg.predict)^2): object 'test.set' not found
+
+
plot((test.set$price-xg.predict))
+
+
## Error in plot((test.set$price - xg.predict)): object 'test.set' not found
+
+
+
+
diff --git a/Desktop/open-source/R/documentation/xgboost.md b/Desktop/open-source/R/documentation/xgboost.md
new file mode 100644
index 00000000..1e598862
--- /dev/null
+++ b/Desktop/open-source/R/documentation/xgboost.md
@@ -0,0 +1,108 @@
+
+
+``` r
+library(tidyverse)
+```
+
+```
+## Error in library(tidyverse): there is no package called 'tidyverse'
+```
+
+``` r
+library(xgboost)
+```
+
+```
+## Error in library(xgboost): there is no package called 'xgboost'
+```
+
+``` r
+ind<-sample(2,nrow(diamonds),replace = T,prob = c(0.7,0.3))
+```
+
+```
+## Error in nrow(diamonds): object 'diamonds' not found
+```
+
+``` r
+train.set<-diamonds[ind==1,]
+```
+
+```
+## Error: object 'diamonds' not found
+```
+
+``` r
+test.set<-diamonds[ind==2,]
+```
+
+```
+## Error: object 'diamonds' not found
+```
+
+``` r
+xgb.train<-bind_cols(select_if(train.set,is.numeric),model.matrix(~cut-1,train.set) %>% as.tibble(),model.matrix(~color-1,train.set) %>% as.tibble(),model.matrix(~clarity-1,train.set) %>% as.tibble())
+```
+
+```
+## Error in bind_cols(select_if(train.set, is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+```
+
+``` r
+xgboost.train<-xgb.DMatrix(data = as.matrix(select(xgb.train,-price)),label=xgb.train$price)
+```
+
+```
+## Error in xgb.DMatrix(data = as.matrix(select(xgb.train, -price)), label = xgb.train$price): could not find function "xgb.DMatrix"
+```
+
+``` r
+xgb.test<-bind_cols(select_if(test.set,is.numeric),model.matrix(~cut-1,test.set) %>% as.tibble(),model.matrix(~color-1,test.set) %>% as.tibble(),model.matrix(~clarity-1,test.set) %>% as.tibble())
+```
+
+```
+## Error in bind_cols(select_if(test.set, is.numeric), model.matrix(~cut - : could not find function "bind_cols"
+```
+
+``` r
+xgboost.test<-xgb.DMatrix(data = select(xgb.test,-price) %>% as.matrix(),label=xgb.test$price)
+```
+
+```
+## Error in xgb.DMatrix(data = select(xgb.test, -price) %>% as.matrix(), : could not find function "xgb.DMatrix"
+```
+
+``` r
+param<-list(eval_metric='rmse',gamma=1,max_depth=6,nthread = 3)
+xg.model<-xgb.train(data = xgboost.train,params = param,watchlist = list(test=xgboost.test),nrounds = 500,early_stopping_rounds = 60,
+ print_every_n = 30)
+```
+
+```
+## Error in xgb.train(data = xgboost.train, params = param, watchlist = list(test = xgboost.test), : could not find function "xgb.train"
+```
+
+``` r
+xg.predict<-predict(xg.model,xgboost.test)
+```
+
+```
+## Error in predict(xg.model, xgboost.test): object 'xg.model' not found
+```
+
+``` r
+mse.xgb<-sqrt(mean((test.set$price-xg.predict)^2))
+```
+
+```
+## Error in mean((test.set$price - xg.predict)^2): object 'test.set' not found
+```
+
+``` r
+plot((test.set$price-xg.predict))
+```
+
+```
+## Error in plot((test.set$price - xg.predict)): object 'test.set' not found
+```
+
diff --git a/Desktop/open-source/R/dynamic_programming/0/0/1_knapsack_problem.r b/Desktop/open-source/R/dynamic_programming/0/0/1_knapsack_problem.r
new file mode 100644
index 00000000..4ff1cc3a
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/0/0/1_knapsack_problem.r
@@ -0,0 +1,194 @@
+# 0/1 Knapsack Problem (Dynamic Programming)
+#
+# The 0/1 Knapsack problem is one of the most classic problems in dynamic programming.
+# Given a set of items, each with a weight and a value, determine the maximum total value
+# you can obtain by putting items in a knapsack with a fixed capacity. Each item can
+# either be included (1) or excluded (0) — hence the name "0/1".
+#
+# Time Complexity: O(n * W) where n = number of items, W = knapsack capacity
+# Space Complexity: O(n * W) for full DP table, O(W) for optimized version
+#
+# Applications:
+# - Budget allocation problems
+# - Resource optimization (CPU scheduling, project selection)
+# - Portfolio selection in finance
+# - Cargo loading and packing
+# - Subset optimization in AI planning
+
+# Classic DP solution for 0/1 Knapsack
+knapsack_01 <- function(weights, values, capacity) {
+ #' Solve 0/1 Knapsack Problem using Dynamic Programming
+ #' @param weights: Numeric vector of item weights
+ #' @param values: Numeric vector of item values
+ #' @param capacity: Maximum weight capacity of the knapsack
+ #' @return: List containing max value, selected items, and DP table
+
+ n <- length(values)
+
+ # Handle edge case
+ if (n == 0 || capacity == 0) {
+ return(list(
+ max_value = 0,
+ selected_items = c(),
+ dp_table = matrix(0, nrow = n + 1, ncol = capacity + 1)
+ ))
+ }
+
+ # Create DP table: dp[i, w] = max value using first i items with capacity w
+ dp <- matrix(0, nrow = n + 1, ncol = capacity + 1)
+
+ # Fill DP table
+ for (i in 1:n) {
+ for (w in 0:capacity) {
+ # Don't include item i
+ dp[i + 1, w + 1] <- dp[i, w + 1]
+
+ # Include item i (if it fits)
+ if (weights[i] <= w) {
+ include_value <- values[i] + dp[i, w - weights[i] + 1]
+ dp[i + 1, w + 1] <- max(dp[i + 1, w + 1], include_value)
+ }
+ }
+ }
+
+ # Backtrack to find selected items
+ selected <- c()
+ i <- n
+ w <- capacity
+
+ while (i > 0 && w > 0) {
+ # If value came from including item i
+ if (dp[i + 1, w + 1] != dp[i, w + 1]) {
+ selected <- c(i, selected)
+ w <- w - weights[i]
+ }
+ i <- i - 1
+ }
+
+ return(list(
+ max_value = dp[n + 1, capacity + 1],
+ selected_items = selected,
+ dp_table = dp
+ ))
+}
+
+# Space-optimized version using only 1D array
+knapsack_01_optimized <- function(weights, values, capacity) {
+ #' Space optimized 0/1 Knapsack using 1D array
+ #' @return: Maximum total value
+
+ n <- length(values)
+
+ if (n == 0 || capacity == 0) {
+ return(0)
+ }
+
+ dp <- rep(0, capacity + 1)
+
+ # Process each item
+ for (i in 1:n) {
+ # Traverse from right to left to avoid overwriting needed values
+ for (w in capacity:weights[i]) {
+ if (weights[i] <= w) {
+ dp[w + 1] <- max(dp[w + 1], values[i] + dp[w - weights[i] + 1])
+ }
+ }
+ }
+
+ return(dp[capacity + 1])
+}
+
+# Helper function to print DP table
+print_knapsack_dp <- function(dp_table, weights, values, capacity) {
+ cat("DP Table for 0/1 Knapsack:\n")
+ cat("Weights:", paste(weights, collapse = ", "), "\n")
+ cat("Values :", paste(values, collapse = ", "), "\n")
+ cat("Capacity:", capacity, "\n\n")
+
+ # Print capacity headers
+ cat(" ")
+ cat(paste(sprintf("%3d", 0:capacity), collapse = " "))
+ cat("\n")
+ cat(paste(rep("-", 8 + 4 * (capacity + 1)), collapse = ""), "\n")
+
+ for (i in 1:nrow(dp_table)) {
+ cat(sprintf("Item %2d | ", i - 1))
+ cat(paste(sprintf("%3d", dp_table[i, ]), collapse = " "))
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== 0/1 Knapsack Problem (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+weights <- c(1, 3, 4, 5)
+values <- c(1, 4, 5, 7)
+capacity <- 7
+
+cat("Test 1: Basic Example\n")
+cat("Weights:", paste(weights, collapse = ", "), "\n")
+cat("Values :", paste(values, collapse = ", "), "\n")
+cat("Capacity:", capacity, "\n\n")
+
+result <- knapsack_01(weights, values, capacity)
+print_knapsack_dp(result$dp_table, weights, values, capacity)
+cat("Maximum Value:", result$max_value, "\n")
+cat("Selected Item Indices:", paste(result$selected_items, collapse = ", "), "\n")
+cat("Total Weight:", sum(weights[result$selected_items]), "\n")
+cat("Total Value:", sum(values[result$selected_items]), "\n\n")
+
+# Test 2: Space Optimized Example
+cat("Test 2: Space Optimized Version\n")
+max_val_opt <- knapsack_01_optimized(weights, values, capacity)
+cat("Maximum Value (Optimized):", max_val_opt, "\n")
+cat("Verification: Both methods match:", result$max_value == max_val_opt, "\n\n")
+
+# Test 3: Larger Dataset
+cat("Test 3: Larger Dataset\n")
+set.seed(42)
+weights <- sample(1:15, 10)
+values <- sample(10:100, 10)
+capacity <- 35
+
+cat("Weights:", paste(weights, collapse = ", "), "\n")
+cat("Values :", paste(values, collapse = ", "), "\n")
+cat("Capacity:", capacity, "\n\n")
+
+large_result <- knapsack_01(weights, values, capacity)
+cat("Maximum Value:", large_result$max_value, "\n")
+cat("Selected Items:", paste(large_result$selected_items, collapse = ", "), "\n")
+cat("Total Weight:", sum(weights[large_result$selected_items]), "\n\n")
+
+# Test 4: Edge Cases
+cat("Test 4: Edge Cases\n")
+cat("Empty items:", knapsack_01(c(), c(), 10)$max_value, "\n")
+cat("Zero capacity:", knapsack_01(weights, values, 0)$max_value, "\n")
+cat("Single item fits:", knapsack_01(c(5), c(10), 10)$max_value, "\n")
+cat("Single item doesn't fit:", knapsack_01(c(10), c(10), 5)$max_value, "\n\n")
+
+# Test 5: Performance Check
+cat("Test 5: Performance Comparison (n=100)\n")
+n <- 100
+weights <- sample(1:15, n, replace = TRUE)
+values <- sample(10:100, n, replace = TRUE)
+capacity <- 200
+
+start_time <- Sys.time()
+res_std <- knapsack_01_optimized(weights, values, capacity)
+std_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Optimized DP result:", res_std, "\n")
+cat("Time taken:", sprintf("%.4f sec", std_time), "\n")
+
+# Verify correctness
+cat("\nVerifying correctness with full DP:\n")
+start_time <- Sys.time()
+res_full <- knapsack_01(weights, values, capacity)
+full_time <- as.numeric(Sys.time() - start_time, units = "secs")
+cat("Full DP result:", res_full$max_value, "\n")
+cat("Time taken:", sprintf("%.4f sec", full_time), "\n")
+cat("Results match:", res_std == res_full$max_value, "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/dynamic_programming/0/1_knapsack_problem.r b/Desktop/open-source/R/dynamic_programming/0/1_knapsack_problem.r
new file mode 100644
index 00000000..c64dd730
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/0/1_knapsack_problem.r
@@ -0,0 +1,155 @@
+# 0/1 Knapsack Problem (Dynamic Programming)
+#
+# The 0/1 Knapsack problem is one of the most classic problems in dynamic programming.
+# Given a set of items, each with a weight and a value, determine the maximum total value
+# you can obtain by putting items in a knapsack with a fixed capacity. Each item can
+# either be included (1) or excluded (0) — hence the name "0/1".
+#
+# Time Complexity: O(n * W) where n = number of items, W = knapsack capacity
+# Space Complexity: O(n * W) for full DP table, O(W) for optimized version
+#
+# Applications:
+# - Budget allocation problems
+# - Resource optimization (CPU scheduling, project selection)
+# - Portfolio selection in finance
+# - Cargo loading and packing
+# - Subset optimization in AI planning
+
+# Classic DP solution for 0/1 Knapsack
+knapsack_01 <- function(weights, values, capacity) {
+ #' Solve 0/1 Knapsack Problem using Dynamic Programming
+ #' @param weights: Numeric vector of item weights
+ #' @param values: Numeric vector of item values
+ #' @param capacity: Maximum weight capacity of the knapsack
+ #' @return: List containing max value, selected items, and DP table
+
+ n <- length(values)
+ dp <- matrix(0, nrow = n + 1, ncol = capacity + 1)
+
+ # Fill DP table
+ for (i in 2:(n + 1)) {
+ for (w in 0:capacity) {
+ if (weights[i - 1] <= w) {
+ include <- values[i - 1] + dp[i - 1, w - weights[i - 1] + 1]
+ exclude <- dp[i - 1, w + 1]
+ dp[i, w + 1] <- max(include, exclude)
+ } else {
+ dp[i, w + 1] <- dp[i - 1, w + 1]
+ }
+ }
+ }
+
+ # Backtrack to find selected items
+ res_value <- dp[n + 1, capacity + 1]
+ selected <- c()
+ w <- capacity
+
+ for (i in n:1) {
+ if (res_value <= 0) break
+ if (res_value == dp[i, w + 1]) next
+
+ # Item i is included
+ selected <- c(i, selected)
+ res_value <- res_value - values[i]
+ w <- w - weights[i]
+ }
+
+ return(list(
+ max_value = dp[n + 1, capacity + 1],
+ selected_items = selected,
+ dp_table = dp
+ ))
+}
+
+# Space-optimized version using only 1D array
+knapsack_01_optimized <- function(weights, values, capacity) {
+ #' Space optimized 0/1 Knapsack using 1D array
+ #' @return: Maximum total value
+
+ n <- length(values)
+ dp <- rep(0, capacity + 1)
+
+ for (i in 1:n) {
+ for (w in capacity:weights[i]) {
+ dp[w + 1] <- max(dp[w + 1], values[i] + dp[w - weights[i] + 1])
+ }
+ }
+
+ return(dp[capacity + 1])
+}
+
+# Helper function to print DP table
+print_knapsack_dp <- function(dp_table, weights, values, capacity) {
+ cat("DP Table for 0/1 Knapsack:\n")
+ cat("Weights:", paste(weights, collapse = ", "), "\n")
+ cat("Values :", paste(values, collapse = ", "), "\n")
+ cat("Capacity:", capacity, "\n\n")
+
+ for (i in 1:nrow(dp_table)) {
+ cat(sprintf("Item %2d | ", i - 1))
+ cat(paste(sprintf("%3d", dp_table[i, ]), collapse = " "))
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== 0/1 Knapsack Problem (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+weights <- c(1, 3, 4, 5)
+values <- c(1, 4, 5, 7)
+capacity <- 7
+
+cat("Test 1: Basic Example\n")
+cat("Weights:", paste(weights, collapse = ", "), "\n")
+cat("Values :", paste(values, collapse = ", "), "\n")
+cat("Capacity:", capacity, "\n\n")
+
+result <- knapsack_01(weights, values, capacity)
+print_knapsack_dp(result$dp_table, weights, values, capacity)
+cat("Maximum Value:", result$max_value, "\n")
+cat("Selected Item Indices:", paste(result$selected_items, collapse = ", "), "\n\n")
+
+# Test 2: Space Optimized Example
+cat("Test 2: Space Optimized Version\n")
+max_val_opt <- knapsack_01_optimized(weights, values, capacity)
+cat("Maximum Value (Optimized):", max_val_opt, "\n\n")
+
+# Test 3: Larger Dataset
+cat("Test 3: Larger Dataset\n")
+set.seed(42)
+weights <- sample(1:15, 10)
+values <- sample(10:100, 10)
+capacity <- 35
+
+cat("Weights:", paste(weights, collapse = ", "), "\n")
+cat("Values :", paste(values, collapse = ", "), "\n")
+cat("Capacity:", capacity, "\n\n")
+
+large_result <- knapsack_01(weights, values, capacity)
+cat("Maximum Value:", large_result$max_value, "\n")
+cat("Selected Items:", paste(large_result$selected_items, collapse = ", "), "\n\n")
+
+# Test 4: Edge Cases
+cat("Test 4: Edge Cases\n")
+cat("Empty items:", knapsack_01(c(), c(), 10)$max_value, "\n")
+cat("Zero capacity:", knapsack_01(weights, values, 0)$max_value, "\n")
+cat("Single item fits:", knapsack_01(c(5), c(10), 10)$max_value, "\n")
+cat("Single item doesn't fit:", knapsack_01(c(10), c(10), 5)$max_value, "\n\n")
+
+# Test 5: Performance Check
+cat("Test 5: Performance Comparison (n=100)\n")
+n <- 100
+weights <- sample(1:15, n, replace = TRUE)
+values <- sample(10:100, n, replace = TRUE)
+capacity <- 200
+
+start_time <- Sys.time()
+res_std <- knapsack_01_optimized(weights, values, capacity)
+std_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Optimized DP result:", res_std, "\n")
+cat("Time taken:", sprintf("%.4f sec", std_time), "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/dynamic_programming/coin_change.r b/Desktop/open-source/R/dynamic_programming/coin_change.r
new file mode 100644
index 00000000..444246bd
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/coin_change.r
@@ -0,0 +1,76 @@
+# Coin Change Problem
+#
+# The Coin Change problem finds the minimum number of coins needed to make a certain amount
+# using a given set of coin denominations.
+#
+# Time Complexity: O(amount * n) where n = number of coin denominations
+# Space Complexity: O(amount)
+#
+# Applications:
+# - Currency and cash management
+# - Making change in vending machines or payment systems
+# - Dynamic resource allocation
+# - Minimum cost problems in algorithms
+
+# Function to compute minimum coins required
+coin_change <- function(coins, amount) {
+ #' Compute minimum number of coins needed to make the given amount
+ #' @param coins: Numeric vector of coin denominations
+ #' @param amount: Total amount to make
+ #' @return: Minimum number of coins required, or -1 if not possible
+
+ # Initialize DP array
+ dp <- rep(Inf, amount + 1)
+ dp[0 + 1] <- 0 # zero coins needed for amount 0
+
+ for (i in 1:amount) {
+ for (coin in coins) {
+ if (coin <= i) {
+ dp[i + 1] <- min(dp[i + 1], 1 + dp[i - coin + 1])
+ }
+ }
+ }
+
+ if (dp[amount + 1] == Inf) {
+ return(-1)
+ } else {
+ return(dp[amount + 1])
+ }
+}
+
+# Function to print the DP table (for educational purposes)
+print_coin_change_dp <- function(dp, amount) {
+ cat("DP Table for Coin Change:\n")
+ for (i in 0:amount) {
+ cat(sprintf("Amount %2d: %s\n", i, dp[i + 1]))
+ }
+ cat("\n")
+}
+
+
+# Example Usage & Testing
+cat("=== Coin Change Problem ===\n\n")
+
+# Test 1: Basic Example
+coins <- c(1, 2, 5)
+amount <- 11
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+cat("Amount:", amount, "\n")
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+
+# Test 2: No solution case
+coins <- c(2, 4)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+cat("Amount:", amount, "\n")
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
+
+# Test 3: Larger dataset
+coins <- c(1, 3, 4, 5)
+amount <- 7
+cat("Coins:", paste(coins, collapse = ", "), "\n")
+cat("Amount:", amount, "\n")
+min_coins <- coin_change(coins, amount)
+cat("Minimum Coins Needed:", min_coins, "\n\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/dynamic_programming/longest_common_subsequence.r b/Desktop/open-source/R/dynamic_programming/longest_common_subsequence.r
new file mode 100644
index 00000000..778282d9
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/longest_common_subsequence.r
@@ -0,0 +1,391 @@
+# Longest Common Subsequence (LCS) Algorithm
+#
+# The LCS problem finds the longest subsequence common to two sequences.
+# A subsequence is a sequence derived from another sequence by deleting some
+# or no elements without changing the order of the remaining elements.
+#
+# Time Complexity: O(m * n) where m, n are lengths of the sequences
+# Space Complexity: O(m * n) for the DP table, O(min(m, n)) optimized version
+#
+# Applications:
+# - DNA sequence analysis in bioinformatics
+# - File difference utilities (diff command)
+# - Version control systems (git diff)
+# - Plagiarism detection
+# - Data compression algorithms
+# - Edit distance calculations
+
+# Basic LCS algorithm with full DP table
+lcs_length <- function(str1, str2) {
+ #' Find the length of longest common subsequence
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ return(dp[m + 1, n + 1])
+}
+
+# LCS algorithm that returns the actual subsequence
+lcs_string <- function(str1, str2) {
+ #' Find the longest common subsequence string
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List containing LCS string and its length
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS string
+ lcs <- ""
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ lcs <- paste0(substr(str1, i - 1, i - 1), lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1],
+ dp_table = dp
+ ))
+}
+
+# Space-optimized LCS (only returns length)
+lcs_length_optimized <- function(str1, str2) {
+ #' Space-optimized LCS length calculation
+ #' Uses only O(min(m, n)) space instead of O(m * n)
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: Length of LCS
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Make str1 the shorter string for space optimization
+ if (m > n) {
+ temp <- str1
+ str1 <- str2
+ str2 <- temp
+ temp <- m
+ m <- n
+ n <- temp
+ }
+
+ # Use two arrays instead of full matrix
+ prev <- rep(0, m + 1)
+ curr <- rep(0, m + 1)
+
+ for (j in 1:(n + 1)) {
+ for (i in 1:(m + 1)) {
+ if (i == 1 || j == 1) {
+ curr[i] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ curr[i] <- prev[i - 1] + 1
+ } else {
+ curr[i] <- max(prev[i], curr[i - 1])
+ }
+ }
+ # Swap arrays
+ temp <- prev
+ prev <- curr
+ curr <- temp
+ }
+
+ return(prev[m + 1])
+}
+
+# Find all possible LCS strings (there can be multiple)
+find_all_lcs <- function(str1, str2) {
+ #' Find all possible longest common subsequences
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @return: List of all LCS strings
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Recursive function to find all LCS
+ find_all_lcs_recursive <- function(i, j) {
+ if (i == 1 || j == 1) {
+ return("")
+ }
+
+ if (substr(str1, i - 1, i - 1) == substr(str2, j - 1, j - 1)) {
+ char <- substr(str1, i - 1, i - 1)
+ prev_lcs <- find_all_lcs_recursive(i - 1, j - 1)
+ return(paste0(prev_lcs, char))
+ } else {
+ results <- c()
+
+ if (dp[i - 1, j] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i - 1, j))
+ }
+
+ if (dp[i, j - 1] == dp[i, j]) {
+ results <- c(results, find_all_lcs_recursive(i, j - 1))
+ }
+
+ return(unique(results))
+ }
+ }
+
+ all_lcs <- find_all_lcs_recursive(m + 1, n + 1)
+ return(unique(all_lcs))
+}
+
+# LCS for arrays/vectors instead of strings
+lcs_array <- function(arr1, arr2) {
+ #' Find LCS of two arrays/vectors
+ #' @param arr1: First array
+ #' @param arr2: Second array
+ #' @return: List with LCS array and length
+
+ m <- length(arr1)
+ n <- length(arr2)
+
+ # Create DP table
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ # Fill the DP table
+ for (i in 1:(m + 1)) {
+ for (j in 1:(n + 1)) {
+ if (i == 1 || j == 1) {
+ dp[i, j] <- 0
+ } else if (arr1[i - 1] == arr2[j - 1]) {
+ dp[i, j] <- dp[i - 1, j - 1] + 1
+ } else {
+ dp[i, j] <- max(dp[i - 1, j], dp[i, j - 1])
+ }
+ }
+ }
+
+ # Backtrack to find the actual LCS
+ lcs <- c()
+ i <- m + 1
+ j <- n + 1
+
+ while (i > 1 && j > 1) {
+ if (arr1[i - 1] == arr2[j - 1]) {
+ lcs <- c(arr1[i - 1], lcs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i - 1, j] > dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+
+ return(list(
+ lcs = lcs,
+ length = dp[m + 1, n + 1]
+ ))
+}
+
+# Function to print the DP table (for educational purposes)
+print_dp_table <- function(str1, str2, dp_table) {
+ #' Print the DP table in a readable format
+ #' @param str1: First string
+ #' @param str2: Second string
+ #' @param dp_table: DP table from lcs_string function
+
+ m <- nchar(str1)
+ n <- nchar(str2)
+
+ cat("DP Table for LCS calculation:\n")
+ cat("String 1:", str1, "\n")
+ cat("String 2:", str2, "\n\n")
+
+ # Print column headers
+ cat(" ε ")
+ for (j in 1:n) {
+ cat(sprintf("%2s ", substr(str2, j, j)))
+ }
+ cat("\n")
+
+ # Print table rows
+ for (i in 1:(m + 1)) {
+ if (i == 1) {
+ cat(" ε ")
+ } else {
+ cat(sprintf("%2s ", substr(str1, i - 1, i - 1)))
+ }
+
+ for (j in 1:(n + 1)) {
+ cat(sprintf("%2d ", dp_table[i, j]))
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# Example usage and testing
+cat("=== Longest Common Subsequence (LCS) Algorithm ===\n\n")
+
+# Test 1: Basic LCS example
+cat("1. Basic LCS Example\n")
+str1 <- "ABCDGH"
+str2 <- "AEDFHR"
+
+cat("String 1:", str1, "\n")
+cat("String 2:", str2, "\n")
+
+result1 <- lcs_string(str1, str2)
+cat("LCS:", result1$lcs, "\n")
+cat("Length:", result1$length, "\n")
+
+print_dp_table(str1, str2, result1$dp_table)
+
+# Test 2: DNA sequence analysis
+cat("2. DNA Sequence Analysis\n")
+dna1 <- "ATCGATCGATCG"
+dna2 <- "ATGCGATGCATG"
+
+cat("DNA Sequence 1:", dna1, "\n")
+cat("DNA Sequence 2:", dna2, "\n")
+
+dna_result <- lcs_string(dna1, dna2)
+cat("Common subsequence:", dna_result$lcs, "\n")
+cat("Length:", dna_result$length, "\n")
+cat("Similarity:", sprintf("%.2f%%", dna_result$length / max(nchar(dna1), nchar(dna2)) * 100), "\n\n")
+
+# Test 3: Finding all possible LCS
+cat("3. Multiple LCS Solutions\n")
+str3 <- "ABCDEF"
+str4 <- "ACBDEF"
+
+cat("String 1:", str3, "\n")
+cat("String 2:", str4, "\n")
+
+all_lcs <- find_all_lcs(str3, str4)
+cat("All possible LCS:\n")
+for (i in seq_along(all_lcs)) {
+ cat(" ", i, ":", all_lcs[i], "\n")
+}
+cat("\n")
+
+# Test 4: Array LCS example
+cat("4. Array LCS Example\n")
+arr1 <- c(1, 2, 3, 4, 5)
+arr2 <- c(2, 3, 5, 7, 8)
+
+cat("Array 1:", paste(arr1, collapse = ", "), "\n")
+cat("Array 2:", paste(arr2, collapse = ", "), "\n")
+
+arr_result <- lcs_array(arr1, arr2)
+cat("LCS Array:", paste(arr_result$lcs, collapse = ", "), "\n")
+cat("Length:", arr_result$length, "\n\n")
+
+# Test 5: Performance comparison
+cat("5. Performance Comparison\n")
+long_str1 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+long_str2 <- paste(sample(LETTERS[1:5], 100, replace = TRUE), collapse = "")
+
+cat("Testing with strings of length 100...\n")
+
+# Standard algorithm
+start_time <- Sys.time()
+standard_result <- lcs_length(long_str1, long_str2)
+standard_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+# Optimized algorithm
+start_time <- Sys.time()
+optimized_result <- lcs_length_optimized(long_str1, long_str2)
+optimized_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Standard algorithm: LCS length =", standard_result,
+ "Time:", sprintf("%.6f", standard_time), "seconds\n")
+cat("Optimized algorithm: LCS length =", optimized_result,
+ "Time:", sprintf("%.6f", optimized_time), "seconds\n")
+cat("Results match:", standard_result == optimized_result, "\n\n")
+
+# Test 6: Edge cases
+cat("6. Edge Cases\n")
+cat("Empty strings:", lcs_length("", "ABC"), "\n")
+cat("One empty string:", lcs_length("ABC", ""), "\n")
+cat("Identical strings:", lcs_length("HELLO", "HELLO"), "\n")
+cat("No common characters:", lcs_length("ABC", "DEF"), "\n")
+cat("Single character:", lcs_length("A", "A"), "\n")
+cat("Single vs multiple:", lcs_length("A", "ABCDEF"), "\n\n")
+
+# Test 7: Real-world example - File diff simulation
+cat("7. File Diff Simulation\n")
+file1_lines <- c("Hello World", "This is line 2", "Line 3 here", "Final line")
+file2_lines <- c("Hello World", "This is modified line 2", "Line 3 here", "New line", "Final line")
+
+cat("File 1 lines:\n")
+for (i in seq_along(file1_lines)) {
+ cat(" ", i, ":", file1_lines[i], "\n")
+}
+
+cat("File 2 lines:\n")
+for (i in seq_along(file2_lines)) {
+ cat(" ", i, ":", file2_lines[i], "\n")
+}
+
+file_lcs <- lcs_array(file1_lines, file2_lines)
+cat("Common lines (unchanged):\n")
+for (i in seq_along(file_lcs$lcs)) {
+ cat(" ", file_lcs$lcs[i], "\n")
+}
+cat("Similarity:", sprintf("%.1f%%", file_lcs$length / max(length(file1_lines), length(file2_lines)) * 100), "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/dynamic_programming/longest_increasing_subsequence.r b/Desktop/open-source/R/dynamic_programming/longest_increasing_subsequence.r
new file mode 100644
index 00000000..242f94bb
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/longest_increasing_subsequence.r
@@ -0,0 +1,249 @@
+# Longest Increasing Subsequence (Dynamic Programming)
+#
+# The Longest Increasing Subsequence (LIS) problem is a classic dynamic programming problem.
+# Given an array of integers, find the length of the longest subsequence that is strictly
+# increasing. A subsequence is derived from the array by deleting some or no elements
+# without changing the order of the remaining elements.
+#
+# Time Complexity: O(n²) for basic DP, O(n log n) for optimized binary search version
+# Space Complexity: O(n) for both approaches
+#
+# Applications:
+# - Bioinformatics (DNA sequence analysis)
+# - Stock market analysis (longest upward trend)
+# - Scheduling problems
+# - Game theory (optimal play sequences)
+# - Data compression and pattern recognition
+
+# Basic DP solution for Longest Increasing Subsequence
+longest_increasing_subsequence <- function(nums) {
+ #' Find the length of the longest increasing subsequence using Dynamic Programming
+ #' @param nums: Numeric vector of integers
+ #' @return: List containing max length, DP array, and one possible LIS
+
+ n <- length(nums)
+
+ # Handle edge cases
+ if (n == 0) {
+ return(list(
+ max_length = 0,
+ dp_array = c(),
+ lis_sequence = c(),
+ dp_table = c()
+ ))
+ }
+
+ if (n == 1) {
+ return(list(
+ max_length = 1,
+ dp_array = c(1),
+ lis_sequence = nums,
+ dp_table = c(1)
+ ))
+ }
+
+ # Initialize DP array: dp[i] = length of LIS ending at index i
+ dp <- rep(1, n)
+
+ # Fill DP array
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ # Find maximum length
+ max_length <- max(dp)
+
+ # Backtrack to find one possible LIS
+ lis_sequence <- c()
+ current_length <- max_length
+
+ for (i in n:1) {
+ if (dp[i] == current_length) {
+ lis_sequence <- c(nums[i], lis_sequence)
+ current_length <- current_length - 1
+ if (current_length == 0) break
+ }
+ }
+
+ return(list(
+ max_length = max_length,
+ dp_array = dp,
+ lis_sequence = lis_sequence,
+ dp_table = dp
+ ))
+}
+
+# Optimized O(n log n) solution using binary search
+longest_increasing_subsequence_optimized <- function(nums) {
+ #' Find the length of the longest increasing subsequence using binary search
+ #' @param nums: Numeric vector of integers
+ #' @return: Length of the longest increasing subsequence
+
+ n <- length(nums)
+
+ if (n == 0) return(0)
+ if (n == 1) return(1)
+
+ # tails[i] stores the smallest tail of all increasing subsequences of length i+1
+ tails <- c()
+
+ for (num in nums) {
+ # Binary search for the position to replace or extend
+ pos <- binary_search_insert_position(tails, num)
+
+ if (pos > length(tails)) {
+ # Extend the sequence
+ tails <- c(tails, num)
+ } else {
+ # Replace the element at position pos
+ tails[pos] <- num
+ }
+ }
+
+ return(length(tails))
+}
+
+# Helper function for binary search
+binary_search_insert_position <- function(arr, target) {
+ #' Binary search to find the position where target should be inserted
+ #' @param arr: Sorted numeric vector
+ #' @param target: Value to insert
+ #' @return: Position (1-indexed) where target should be inserted
+
+ if (length(arr) == 0) return(1)
+
+ left <- 1
+ right <- length(arr)
+
+ while (left <= right) {
+ mid <- left + (right - left) %/% 2
+
+ if (arr[mid] < target) {
+ left <- mid + 1
+ } else {
+ right <- mid - 1
+ }
+ }
+
+ return(left)
+}
+
+# Function to find all possible LIS sequences (simplified version)
+find_all_lis <- function(nums) {
+ #' Find all possible longest increasing subsequences
+ #' @param nums: Numeric vector of integers
+ #' @return: List of all possible LIS sequences
+
+ n <- length(nums)
+ if (n == 0) return(list())
+
+ # Calculate DP array
+ dp <- rep(1, n)
+ for (i in 2:n) {
+ for (j in 1:(i - 1)) {
+ if (nums[j] < nums[i]) {
+ dp[i] <- max(dp[i], dp[j] + 1)
+ }
+ }
+ }
+
+ max_length <- max(dp)
+
+ # For simplicity, return just one LIS (same as the main function)
+ # Finding all possible LIS is complex and not essential for the algorithm demonstration
+ result <- longest_increasing_subsequence(nums)
+ return(list(result$lis_sequence))
+}
+
+# Helper function to print DP table
+print_lis_dp <- function(dp_array, nums) {
+ cat("DP Array for Longest Increasing Subsequence:\n")
+ cat("Input Array:", paste(nums, collapse = ", "), "\n")
+ cat("DP Array :", paste(dp_array, collapse = ", "), "\n")
+ cat("Max Length :", max(dp_array), "\n\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Longest Increasing Subsequence (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+nums1 <- c(10, 9, 2, 5, 3, 7, 101, 18)
+cat("Test 1: Basic Example\n")
+cat("Input Array:", paste(nums1, collapse = ", "), "\n\n")
+
+result1 <- longest_increasing_subsequence(nums1)
+print_lis_dp(result1$dp_array, nums1)
+cat("Maximum Length:", result1$max_length, "\n")
+cat("One LIS Sequence:", paste(result1$lis_sequence, collapse = ", "), "\n\n")
+
+# Test 2: Optimized Version
+cat("Test 2: Optimized O(n log n) Version\n")
+max_len_opt <- longest_increasing_subsequence_optimized(nums1)
+cat("Maximum Length (Optimized):", max_len_opt, "\n")
+cat("Verification: Both methods match:", result1$max_length == max_len_opt, "\n\n")
+
+# Test 3: All Possible LIS
+cat("Test 3: All Possible LIS Sequences\n")
+all_lis <- find_all_lis(nums1)
+cat("Total number of LIS sequences:", length(all_lis), "\n")
+for (i in seq_along(all_lis)) {
+ cat("LIS", i, ":", paste(all_lis[[i]], collapse = ", "), "\n")
+}
+cat("\n")
+
+# Test 4: Edge Cases
+cat("Test 4: Edge Cases\n")
+cat("Empty array:", longest_increasing_subsequence(c())$max_length, "\n")
+cat("Single element:", longest_increasing_subsequence(c(5))$max_length, "\n")
+cat("All same elements:", longest_increasing_subsequence(c(3, 3, 3, 3))$max_length, "\n")
+cat("Strictly decreasing:", longest_increasing_subsequence(c(5, 4, 3, 2, 1))$max_length, "\n")
+cat("Strictly increasing:", longest_increasing_subsequence(c(1, 2, 3, 4, 5))$max_length, "\n\n")
+
+# Test 5: Larger Dataset
+cat("Test 5: Larger Dataset (n=20)\n")
+set.seed(42)
+nums_large <- sample(1:100, 20)
+cat("Input Array:", paste(nums_large, collapse = ", "), "\n\n")
+
+result_large <- longest_increasing_subsequence(nums_large)
+cat("Maximum Length:", result_large$max_length, "\n")
+cat("One LIS Sequence:", paste(result_large$lis_sequence, collapse = ", "), "\n\n")
+
+# Test 6: Performance Comparison
+cat("Test 6: Performance Comparison (n=1000)\n")
+n <- 1000
+nums_perf <- sample(1:1000, n)
+
+start_time <- Sys.time()
+res_opt <- longest_increasing_subsequence_optimized(nums_perf)
+opt_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Optimized O(n log n) result:", res_opt, "\n")
+cat("Time taken:", sprintf("%.4f sec", opt_time), "\n")
+
+# Verify correctness with basic DP (smaller sample for time comparison)
+nums_small <- nums_perf[1:100]
+start_time <- Sys.time()
+res_basic <- longest_increasing_subsequence(nums_small)
+basic_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Basic O(n²) result (n=100):", res_basic$max_length, "\n")
+cat("Time taken:", sprintf("%.4f sec", basic_time), "\n")
+
+# Test 7: Real-world Example - Stock Prices
+cat("Test 7: Real-world Example - Stock Price Trend\n")
+stock_prices <- c(100, 102, 98, 105, 103, 107, 110, 108, 112, 115, 113, 118, 120, 117, 125)
+cat("Stock Prices:", paste(stock_prices, collapse = ", "), "\n")
+
+stock_result <- longest_increasing_subsequence(stock_prices)
+cat("Longest upward trend length:", stock_result$max_length, "\n")
+cat("Longest upward trend:", paste(stock_result$lis_sequence, collapse = ", "), "\n")
+cat("Percentage increase:",
+ sprintf("%.2f%%", (stock_result$lis_sequence[length(stock_result$lis_sequence)] /
+ stock_result$lis_sequence[1] - 1) * 100), "\n")
diff --git a/Desktop/open-source/R/dynamic_programming/matrix_chain_multiplication.r b/Desktop/open-source/R/dynamic_programming/matrix_chain_multiplication.r
new file mode 100644
index 00000000..54c5ab0b
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/matrix_chain_multiplication.r
@@ -0,0 +1,481 @@
+# Matrix Chain Multiplication Problem
+#
+# The Matrix Chain Multiplication problem finds the most efficient way to multiply
+# a given sequence of matrices. The goal is to minimize the number of scalar
+# multiplications required by determining the optimal parenthesization.
+#
+# Time Complexity: O(n³) where n = number of matrices
+# Space Complexity: O(n²) for DP table
+#
+# Applications:
+# - Computer graphics and 3D transformations
+# - Scientific computing and numerical analysis
+# - Machine learning and neural networks
+# - Signal processing and image processing
+# - Optimization in linear algebra operations
+
+# Basic DP solution for Matrix Chain Multiplication
+matrix_chain_multiplication <- function(dimensions) {
+ #' Find the minimum number of scalar multiplications needed to multiply matrices
+ #' @param dimensions: Vector of matrix dimensions [d0, d1, d2, ..., dn] where
+ #' matrix i has dimensions dimensions[i] x dimensions[i+1]
+ #' @return: List containing minimum cost, DP table, and optimal parenthesization
+
+ n <- length(dimensions) - 1 # Number of matrices
+
+ # Handle edge cases
+ if (n <= 0) {
+ return(list(
+ min_cost = 0,
+ dp_table = matrix(0, nrow = 1, ncol = 1),
+ optimal_parentheses = "",
+ split_table = matrix(0, nrow = 1, ncol = 1)
+ ))
+ }
+
+ if (n == 1) {
+ return(list(
+ min_cost = 0,
+ dp_table = matrix(0, nrow = 1, ncol = 1),
+ optimal_parentheses = "A1",
+ split_table = matrix(0, nrow = 1, ncol = 1)
+ ))
+ }
+
+ # Create DP table: dp[i, j] = minimum cost to multiply matrices from i to j
+ dp <- matrix(0, nrow = n, ncol = n)
+ split_table <- matrix(0, nrow = n, ncol = n) # To store optimal split points
+
+ # Fill DP table using bottom-up approach
+ for (length in 2:n) { # length of chain
+ for (i in 1:(n - length + 1)) {
+ j <- i + length - 1
+ dp[i, j] <- Inf
+
+ # Try all possible split points
+ for (k in i:(j - 1)) {
+ # Cost = cost of left part + cost of right part + cost of multiplying them
+ cost <- dp[i, k] + dp[k + 1, j] + dimensions[i] * dimensions[k + 1] * dimensions[j + 1]
+
+ if (cost < dp[i, j]) {
+ dp[i, j] <- cost
+ split_table[i, j] <- k # Store optimal split point
+ }
+ }
+ }
+ }
+
+ # Generate optimal parenthesization
+ optimal_parentheses <- generate_parentheses(split_table, 1, n)
+
+ return(list(
+ min_cost = dp[1, n],
+ dp_table = dp,
+ optimal_parentheses = optimal_parentheses,
+ split_table = split_table
+ ))
+}
+
+# Recursive function to generate optimal parenthesization
+generate_parentheses <- function(split_table, i, j) {
+ if (i == j) {
+ return(paste0("A", i))
+ } else {
+ k <- split_table[i, j]
+ left <- generate_parentheses(split_table, i, k)
+ right <- generate_parentheses(split_table, k + 1, j)
+ return(paste0("(", left, " × ", right, ")"))
+ }
+}
+
+# Space-optimized version using only upper triangular matrix
+matrix_chain_optimized <- function(dimensions) {
+ #' Space optimized matrix chain multiplication
+ #' @param dimensions: Vector of matrix dimensions
+ #' @return: Minimum cost only
+
+ n <- length(dimensions) - 1
+
+ if (n <= 1) return(0)
+
+ # Use only upper triangular part of DP table
+ dp <- matrix(0, nrow = n, ncol = n)
+
+ for (length in 2:n) {
+ for (i in 1:(n - length + 1)) {
+ j <- i + length - 1
+ dp[i, j] <- Inf
+
+ for (k in i:(j - 1)) {
+ cost <- dp[i, k] + dp[k + 1, j] + dimensions[i] * dimensions[k + 1] * dimensions[j + 1]
+ dp[i, j] <- min(dp[i, j], cost)
+ }
+ }
+ }
+
+ return(dp[1, n])
+}
+
+# Function to calculate actual multiplication cost for given parenthesization
+calculate_multiplication_cost <- function(dimensions, parentheses) {
+ #' Calculate the actual cost of multiplying matrices with given parenthesization
+ #' @param dimensions: Vector of matrix dimensions
+ #' @param parentheses: String representation of parenthesization
+ #' @return: Total cost of multiplication
+
+ # Helper to parse the parenthesization string into a tree
+ parse_parentheses <- function(s) {
+ s <- gsub(" ", "", s) # Remove spaces
+ # If it's a single matrix, e.g. "A1"
+ if (grepl("^A[0-9]+$", s)) {
+ idx <- as.integer(sub("A", "", s))
+ return(list(type = "leaf", idx = idx))
+ }
+ # Otherwise, find the main split
+ # Remove outer parentheses if present
+ if (substr(s, 1, 1) == "(" && substr(s, nchar(s), nchar(s)) == ")") {
+ s <- substr(s, 2, nchar(s) - 1)
+ }
+ # Find the split point for " × " at the top level
+ depth <- 0
+ for (i in seq_len(nchar(s))) {
+ ch <- substr(s, i, i)
+ if (ch == "(") depth <- depth + 1
+ if (ch == ")") depth <- depth - 1
+ # Look for "×" at depth 0
+ if (depth == 0 && i < nchar(s) && substr(s, i, i+1) == "×") {
+ left <- substr(s, 1, i-2)
+ right <- substr(s, i+2, nchar(s))
+ return(list(
+ type = "node",
+ left = parse_parentheses(left),
+ right = parse_parentheses(right)
+ ))
+ }
+ }
+ stop("Invalid parenthesization string")
+ }
+
+ # Helper to compute cost recursively
+ compute_cost <- function(node) {
+ if (node$type == "leaf") {
+ # Return dimensions for this matrix
+ idx <- node$idx
+ return(list(rows = dimensions[idx], cols = dimensions[idx+1], cost = 0))
+ } else {
+ left <- compute_cost(node$left)
+ right <- compute_cost(node$right)
+ # Multiply left and right matrices: left$rows x left$cols and right$rows x right$cols
+ if (left$cols != right$rows) stop("Incompatible matrix dimensions")
+ cost <- left$cost + right$cost + left$rows * left$cols * right$cols
+ return(list(rows = left$rows, cols = right$cols, cost = cost))
+ }
+ }
+
+ tree <- parse_parentheses(parentheses)
+ result <- compute_cost(tree)
+ return(result$cost)
+}
+
+# Function to find all possible optimal parenthesizations
+find_all_optimal_parentheses <- function(dimensions) {
+ #' Find all possible optimal parenthesizations
+ #' @param dimensions: Vector of matrix dimensions
+ #' @return: List of all optimal parenthesizations
+
+ n <- length(dimensions) - 1
+ if (n <= 1) return(list(paste0("A", 1:n)))
+
+ result <- matrix_chain_multiplication(dimensions)
+ min_cost <- result$min_cost
+
+ all_parentheses <- list()
+
+ # Generate all possible parenthesizations and filter optimal ones
+ generate_all_parentheses <- function(i, j) {
+ if (i == j) {
+ return(list(paste0("A", i)))
+ }
+
+ all_ways <- list()
+ for (k in i:(j - 1)) {
+ left_ways <- generate_all_parentheses(i, k)
+ right_ways <- generate_all_parentheses(k + 1, j)
+
+ for (left in left_ways) {
+ for (right in right_ways) {
+ all_ways <- c(all_ways, list(paste0("(", left, " × ", right, ")")))
+ }
+ }
+ }
+ return(all_ways)
+ }
+
+ all_ways <- generate_all_parentheses(1, n)
+
+ # Helper function to compute cost of a parenthesization
+ compute_cost <- function(paren, dims) {
+ # Recursively compute cost based on parenthesization string
+ # paren: string like "(A1 × (A2 × A3))"
+ # dims: vector of dimensions
+ # Returns: cost (integer)
+
+ # Parse the parenthesization string into a tree structure
+ # Helper: extract indices of matrices in a parenthesization
+ extract_indices <- function(s) {
+ # Returns vector of matrix indices in the string
+ matches <- gregexpr("A[0-9]+", s)
+ as.integer(unlist(regmatches(s, matches)))
+ }
+
+ # Helper: recursively compute cost
+ recursive_cost <- function(s) {
+ # If s is a single matrix, cost is 0
+ indices <- extract_indices(s)
+ if (length(indices) == 1) {
+ return(list(cost = 0, left = indices[1], right = indices[1]))
+ }
+
+ # Find the main split: the outermost multiplication
+ # Remove outer parentheses
+ s_trim <- substring(s, 2, nchar(s) - 1)
+
+ # Find the split point (the × not inside parentheses)
+ depth <- 0
+ split_pos <- NULL
+ for (i in seq_len(nchar(s_trim))) {
+ ch <- substr(s_trim, i, i)
+ if (ch == "(") depth <- depth + 1
+ if (ch == ")") depth <- depth - 1
+ if (ch == "×" && depth == 0) {
+ split_pos <- i
+ break
+ }
+ }
+ if (is.null(split_pos)) {
+ # Should not happen
+ stop("Invalid parenthesization string: ", s)
+ }
+
+ left_str <- trimws(substring(s_trim, 1, split_pos - 1))
+ right_str <- trimws(substring(s_trim, split_pos + 1))
+
+ left <- recursive_cost(left_str)
+ right <- recursive_cost(right_str)
+
+ # The multiplication cost: dims[left$left] x dims[left$right+1] x dims[right$right+1]
+ cost_here <- dims[left$left] * dims[left$right + 1] * dims[right$right + 1]
+ total_cost <- left$cost + right$cost + cost_here
+ return(list(cost = total_cost, left = left$left, right = right$right))
+ }
+
+ recursive_cost(paren)$cost
+ }
+
+ # Compute cost for each parenthesization
+ costs <- sapply(all_ways, compute_cost, dims = dimensions)
+
+ # Find all parenthesizations with minimum cost
+ optimal_ways <- all_ways[costs == min_cost]
+
+ return(optimal_ways)
+
+# Helper function to print DP table
+print_matrix_chain_dp <- function(dp_table, split_table, dimensions) {
+ n <- nrow(dp_table)
+
+ cat("DP Table for Matrix Chain Multiplication:\n")
+ cat("Matrix dimensions:", paste(dimensions, collapse = " × "), "\n")
+ cat("Number of matrices:", n, "\n\n")
+
+ # Print matrix indices
+ cat(" ")
+ for (j in 1:n) {
+ cat(sprintf("%4d", j))
+ }
+ cat("\n")
+ cat(paste(rep("-", 5 + 4 * n), collapse = ""), "\n")
+
+ for (i in 1:n) {
+ cat(sprintf("%2d | ", i))
+ for (j in 1:n) {
+ if (i <= j) {
+ cat(sprintf("%4d", dp_table[i, j]))
+ } else {
+ cat(" ")
+ }
+ }
+ cat("\n")
+ }
+ cat("\n")
+
+ # Print split table
+ cat("Split Table (optimal split points):\n")
+ cat(" ")
+ for (j in 1:n) {
+ cat(sprintf("%4d", j))
+ }
+ cat("\n")
+ cat(paste(rep("-", 5 + 4 * n), collapse = ""), "\n")
+
+ for (i in 1:n) {
+ cat(sprintf("%2d | ", i))
+ for (j in 1:n) {
+ if (i < j) {
+ cat(sprintf("%4d", split_table[i, j]))
+ } else {
+ cat(" ")
+ }
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# Helper function to show matrix multiplication steps
+show_multiplication_steps <- function(dimensions, parentheses) {
+ n <- length(dimensions) - 1
+
+ cat("Matrix Chain Multiplication Steps:\n")
+ cat("Matrix dimensions:", paste(dimensions, collapse = " × "), "\n")
+ cat("Optimal parenthesization:", parentheses, "\n\n")
+
+ for (i in 1:n) {
+ cat(sprintf("A%d: %d × %d matrix\n", i, dimensions[i], dimensions[i + 1]))
+ }
+ cat("\n")
+
+ # Show cost calculation
+ result <- matrix_chain_multiplication(dimensions)
+ cat("Minimum scalar multiplications:", result$min_cost, "\n")
+
+ # Show cost breakdown for small examples
+ if (n <= 4) {
+ cat("\nCost breakdown:\n")
+ for (i in 1:n) {
+ for (j in i:n) {
+ if (i < j) {
+ cat(sprintf("Cost[A%d..A%d] = %d\n", i, j, result$dp_table[i, j]))
+ }
+ }
+ }
+ }
+ cat("\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Matrix Chain Multiplication Problem (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+cat("Test 1: Basic Example\n")
+dims1 <- c(1, 5, 4, 6, 2) # 4 matrices: 1×5, 5×4, 4×6, 6×2
+cat("Matrix dimensions:", paste(dims1, collapse = " × "), "\n")
+
+result1 <- matrix_chain_multiplication(dims1)
+print_matrix_chain_dp(result1$dp_table, result1$split_table, dims1)
+cat("Minimum cost:", result1$min_cost, "\n")
+cat("Optimal parenthesization:", result1$optimal_parentheses, "\n")
+show_multiplication_steps(dims1, result1$optimal_parentheses)
+
+# Test 2: Optimized Version
+cat("Test 2: Space Optimized Version\n")
+min_cost_opt <- matrix_chain_optimized(dims1)
+cat("Minimum cost (Optimized):", min_cost_opt, "\n")
+cat("Verification: Both methods match:", result1$min_cost == min_cost_opt, "\n\n")
+
+# Test 3: Edge Cases
+cat("Test 3: Edge Cases\n")
+cat("Single matrix:", matrix_chain_multiplication(c(3, 4))$min_cost, "\n")
+cat("Two matrices:", matrix_chain_multiplication(c(3, 4, 5))$min_cost, "\n")
+cat("Empty dimensions:", matrix_chain_multiplication(c())$min_cost, "\n\n")
+
+# Test 4: Larger Example
+cat("Test 4: Larger Example (6 matrices)\n")
+dims_large <- c(30, 35, 15, 5, 10, 20, 25)
+cat("Matrix dimensions:", paste(dims_large, collapse = " × "), "\n")
+
+result_large <- matrix_chain_multiplication(dims_large)
+cat("Minimum cost:", result_large$min_cost, "\n")
+cat("Optimal parenthesization:", result_large$optimal_parentheses, "\n\n")
+
+# Test 5: Performance Comparison
+cat("Test 5: Performance Comparison (8 matrices)\n")
+set.seed(42)
+dims_perf <- sample(10:50, 9) # 8 matrices
+cat("Matrix dimensions:", paste(dims_perf, collapse = " × "), "\n")
+
+start_time <- Sys.time()
+result_std <- matrix_chain_multiplication(dims_perf)
+std_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+start_time <- Sys.time()
+result_opt <- matrix_chain_optimized(dims_perf)
+opt_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Standard DP result:", result_std$min_cost, "\n")
+cat("Optimized DP result:", result_opt, "\n")
+cat("Standard DP time:", sprintf("%.4f sec", std_time), "\n")
+cat("Optimized DP time:", sprintf("%.4f sec", opt_time), "\n")
+cat("Results match:", result_std$min_cost == result_opt, "\n\n")
+
+# Test 6: Multiple Optimal Solutions
+cat("Test 6: Multiple Optimal Solutions\n")
+dims_multiple <- c(2, 3, 2, 3, 2) # 4 matrices with multiple optimal solutions
+cat("Matrix dimensions:", paste(dims_multiple, collapse = " × "), "\n")
+
+result_multiple <- matrix_chain_multiplication(dims_multiple)
+cat("Minimum cost:", result_multiple$min_cost, "\n")
+cat("One optimal parenthesization:", result_multiple$optimal_parentheses, "\n")
+
+# Find all optimal parenthesizations
+all_optimal <- find_all_optimal_parentheses(dims_multiple)
+cat("Total optimal parenthesizations found:", length(all_optimal), "\n")
+for (i in seq_along(all_optimal)) {
+ cat("Option", i, ":", all_optimal[[i]], "\n")
+}
+cat("\n")
+
+# Test 7: Real-world Example - Computer Graphics
+cat("Test 7: Real-world Example - 3D Graphics Pipeline\n")
+# Typical 3D transformation pipeline: Model → View → Projection → Screen
+# Each transformation is a matrix multiplication
+graphics_dims <- c(4, 4, 4, 4, 4) # 4×4 transformation matrices
+cat("3D Graphics Pipeline (4×4 transformation matrices):\n")
+cat("Matrix dimensions:", paste(graphics_dims, collapse = " × "), "\n")
+
+graphics_result <- matrix_chain_multiplication(graphics_dims)
+cat("Minimum cost:", graphics_result$min_cost, "\n")
+cat("Optimal order:", graphics_result$optimal_parentheses, "\n")
+
+# Show the transformation pipeline
+cat("\nTransformation Pipeline:\n")
+cat("1. Model Matrix (4×4) - Object to World coordinates\n")
+cat("2. View Matrix (4×4) - World to Camera coordinates\n")
+cat("3. Projection Matrix (4×4) - Camera to Clip coordinates\n")
+cat("4. Viewport Matrix (4×4) - Clip to Screen coordinates\n")
+cat("Total operations saved with optimal order:", graphics_result$min_cost, "scalar multiplications\n\n")
+
+# Test 8: Cost Analysis
+cat("Test 8: Cost Analysis\n")
+dims_analysis <- c(10, 20, 30, 40, 50)
+cat("Matrix dimensions:", paste(dims_analysis, collapse = " × "), "\n")
+
+analysis_result <- matrix_chain_multiplication(dims_analysis)
+cat("Minimum cost:", analysis_result$min_cost, "\n")
+
+# Compare with naive left-to-right multiplication
+naive_cost <- 0
+# Simulate left-to-right multiplication, updating result matrix dimensions
+rows <- dims_analysis[1]
+cols <- dims_analysis[2]
+for (i in 2:(length(dims_analysis) - 1)) {
+ next_cols <- dims_analysis[i + 1]
+ naive_cost <- naive_cost + rows * cols * next_cols
+ cols <- next_cols
+}
+
+cat("Naive left-to-right cost:", naive_cost, "\n")
+cat("Savings with optimal order:", naive_cost - analysis_result$min_cost, "\n")
+cat("Percentage improvement:", sprintf("%.1f%%", (naive_cost - analysis_result$min_cost) / naive_cost * 100), "\n")
diff --git a/Desktop/open-source/R/dynamic_programming/minimum_path_sum.r b/Desktop/open-source/R/dynamic_programming/minimum_path_sum.r
new file mode 100644
index 00000000..15279100
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/minimum_path_sum.r
@@ -0,0 +1,373 @@
+# Minimum Path Sum Problem
+#
+# The Minimum Path Sum problem finds the minimum sum path from the top-left corner
+# to the bottom-right corner of a grid, moving only right or down at each step.
+# This is a classic dynamic programming problem that appears in various forms.
+#
+# Time Complexity: O(m * n) where m = number of rows, n = number of columns
+# Space Complexity: O(m * n) for DP table, O(min(m, n)) for optimized version
+#
+# Applications:
+# - Grid-based pathfinding algorithms
+# - Resource optimization in 2D grids
+# - Game development (pathfinding with costs)
+# - Network routing optimization
+# - Cost minimization in transportation
+
+# Basic DP solution for Minimum Path Sum
+minimum_path_sum <- function(grid) {
+ #' Find the minimum path sum from top-left to bottom-right corner
+ #' @param grid: 2D matrix of non-negative integers
+ #' @return: List containing minimum sum, path, and DP table
+
+ m <- nrow(grid)
+ n <- ncol(grid)
+
+ # Handle edge case
+ if (m == 0 || n == 0) {
+ return(list(
+ min_sum = 0,
+ path = c(),
+ dp_table = matrix(0, nrow = 1, ncol = 1)
+ ))
+ }
+
+ # Create DP table: dp[i, j] = minimum sum to reach position (i, j)
+ dp <- matrix(0, nrow = m, ncol = n)
+
+ # Initialize first row and column
+ dp[1, 1] <- grid[1, 1]
+
+ # Fill first row (can only move right)
+ if (n > 1) {
+ for (j in 2:n) {
+ dp[1, j] <- dp[1, j - 1] + grid[1, j]
+ }
+ }
+
+ # Fill first column (can only move down)
+ if (m > 1) {
+ for (i in 2:m) {
+ dp[i, 1] <- dp[i - 1, 1] + grid[i, 1]
+ }
+ }
+
+ # Fill remaining cells
+ if (m > 1 && n > 1) {
+ for (i in 2:m) {
+ for (j in 2:n) {
+ dp[i, j] <- min(dp[i - 1, j], dp[i, j - 1]) + grid[i, j]
+ }
+ }
+ }
+
+ # Backtrack to find the path
+ path <- list()
+ i <- m
+ j <- n
+
+ while (i > 1 || j > 1) {
+ path <- c(list(c(i, j)), path)
+
+ if (i == 1) {
+ # Can only move left
+ j <- j - 1
+ } else if (j == 1) {
+ # Can only move up
+ i <- i - 1
+ } else {
+ # Choose direction with minimum sum
+ if (dp[i - 1, j] < dp[i, j - 1]) {
+ i <- i - 1
+ } else {
+ j <- j - 1
+ }
+ }
+ }
+ path <- c(list(c(1, 1)), path)
+
+ return(list(
+ min_sum = dp[m, n],
+ path = path,
+ dp_table = dp
+ ))
+}
+
+# Space-optimized version using only 1D array
+minimum_path_sum_optimized <- function(grid) {
+ #' Space optimized minimum path sum using 1D array
+ #' @param grid: 2D matrix of non-negative integers
+ #' @return: Minimum path sum
+
+ m <- nrow(grid)
+ n <- ncol(grid)
+
+ if (m == 0 || n == 0) return(0)
+
+ # Use the smaller dimension for space optimization
+ if (m <= n) {
+ # Process row by row
+ dp <- rep(0, m)
+ dp[1] <- grid[1, 1]
+
+ # Initialize first row
+ if (m > 1) {
+ for (i in 2:m) {
+ dp[i] <- dp[i - 1] + grid[i, 1]
+ }
+ }
+
+ # Process remaining columns
+ for (j in 2:n) {
+ dp[1] <- dp[1] + grid[1, j]
+ for (i in 2:m) {
+ dp[i] <- min(dp[i - 1], dp[i]) + grid[i, j]
+ }
+ }
+ } else {
+ # Process column by column
+ dp <- rep(0, n)
+ dp[1] <- grid[1, 1]
+
+ # Initialize first column
+ for (j in 2:n) {
+ dp[j] <- dp[j - 1] + grid[1, j]
+ }
+
+ # Process remaining rows
+ for (i in 2:m) {
+ dp[1] <- dp[1] + grid[i, 1]
+ for (j in 2:n) {
+ dp[j] <- min(dp[j - 1], dp[j]) + grid[i, j]
+ }
+ }
+ }
+
+ return(dp[length(dp)])
+}
+
+# Function to find all possible minimum paths
+find_all_minimum_paths <- function(grid) {
+ #' Find all possible paths that achieve the minimum sum
+ #' @param grid: 2D matrix of non-negative integers
+ #' @return: List of all minimum cost paths
+
+ m <- nrow(grid)
+ n <- ncol(grid)
+
+ if (m == 0 || n == 0) return(list())
+
+ # First compute the minimum sum
+ result <- minimum_path_sum(grid)
+ min_sum <- result$min_sum
+
+ all_paths <- list()
+
+ # Use recursive backtracking to find all paths with minimum sum
+ find_paths_recursive <- function(current_path, current_sum, i, j) {
+ current_sum <- current_sum + grid[i, j]
+
+ # If we've reached the bottom-right corner
+ if (i == m && j == n) {
+ if (current_sum == min_sum) {
+ all_paths <<- c(all_paths, list(c(current_path, list(c(i, j)))))
+ }
+ return
+ }
+
+ # If current sum exceeds minimum, prune
+ if (current_sum > min_sum) {
+ return
+ }
+
+ # Move right
+ if (j < n) {
+ find_paths_recursive(c(current_path, list(c(i, j))), current_sum, i, j + 1)
+ }
+
+ # Move down
+ if (i < m) {
+ find_paths_recursive(c(current_path, list(c(i, j))), current_sum, i + 1, j)
+ }
+ }
+
+ find_paths_recursive(list(), 0, 1, 1)
+ return(all_paths)
+}
+
+# Helper function to print DP table
+print_minimum_path_sum_dp <- function(dp_table, grid) {
+ m <- nrow(grid)
+ n <- ncol(grid)
+
+ cat("DP Table for Minimum Path Sum:\n")
+ cat("Grid:\n")
+ for (i in 1:m) {
+ cat(" ")
+ for (j in 1:n) {
+ cat(sprintf("%3d ", grid[i, j]))
+ }
+ cat("\n")
+ }
+ cat("\nDP Table:\n")
+ for (i in 1:m) {
+ cat(" ")
+ for (j in 1:n) {
+ cat(sprintf("%3d ", dp_table[i, j]))
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# Helper function to visualize path on grid
+visualize_path <- function(grid, path) {
+ m <- nrow(grid)
+ n <- ncol(grid)
+
+ cat("Path Visualization:\n")
+ cat("Grid with path marked (*):\n")
+
+ # Create a matrix to mark the path
+ path_matrix <- matrix(" ", nrow = m, ncol = n)
+
+ for (pos in path) {
+ path_matrix[pos[1], pos[2]] <- "*"
+ }
+
+ for (i in 1:m) {
+ cat(" ")
+ for (j in 1:n) {
+ if (path_matrix[i, j] == "*") {
+ cat(sprintf("%3s ", "*"))
+ } else {
+ cat(sprintf("%3d ", grid[i, j]))
+ }
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Minimum Path Sum Problem (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+cat("Test 1: Basic Example\n")
+grid1 <- matrix(c(1, 3, 1, 1, 5, 1, 4, 2, 1), nrow = 3, ncol = 3, byrow = TRUE)
+cat("Grid:\n")
+print(grid1)
+
+result1 <- minimum_path_sum(grid1)
+print_minimum_path_sum_dp(result1$dp_table, grid1)
+cat("Minimum Path Sum:", result1$min_sum, "\n")
+cat("Path (row, col):", paste(sapply(result1$path, function(x) paste("(", x[1], ",", x[2], ")", sep="")), collapse = " -> "), "\n")
+visualize_path(grid1, result1$path)
+cat("\n")
+
+# Test 2: Optimized Version
+cat("Test 2: Space Optimized Version\n")
+min_sum_opt <- minimum_path_sum_optimized(grid1)
+cat("Minimum Path Sum (Optimized):", min_sum_opt, "\n")
+cat("Verification: Both methods match:", result1$min_sum == min_sum_opt, "\n\n")
+
+# Test 3: Single Row/Column Cases
+cat("Test 3: Edge Cases\n")
+cat("Single row grid:\n")
+grid_row <- matrix(c(1, 2, 3, 4, 5), nrow = 1)
+print(grid_row)
+result_row <- minimum_path_sum(grid_row)
+cat("Minimum sum:", result_row$min_sum, "\n\n")
+
+cat("Single column grid:\n")
+grid_col <- matrix(c(1, 2, 3, 4, 5), ncol = 1)
+print(grid_col)
+result_col <- minimum_path_sum(grid_col)
+cat("Minimum sum:", result_col$min_sum, "\n\n")
+
+# Test 4: Larger Grid
+cat("Test 4: Larger Grid (4x5)\n")
+# Set random seed for reproducibility in tests. The value 42 is chosen arbitrarily.
+SEED <- 42
+set.seed(SEED)
+grid_large <- matrix(sample(1:9, 20, replace = TRUE), nrow = 4, ncol = 5)
+cat("Grid:\n")
+print(grid_large)
+
+result_large <- minimum_path_sum(grid_large)
+cat("Minimum Path Sum:", result_large$min_sum, "\n")
+cat("Path length:", length(result_large$path), "steps\n")
+visualize_path(grid_large, result_large$path)
+cat("\n")
+
+# Test 5: Performance Comparison
+cat("Test 5: Performance Comparison (6x8 grid)\n")
+grid_perf <- matrix(sample(1:20, 48, replace = TRUE), nrow = 6, ncol = 8)
+
+library(microbenchmark)
+
+mbm <- microbenchmark(
+ std = minimum_path_sum(grid_perf),
+ opt = minimum_path_sum_optimized(grid_perf),
+ times = 100L
+)
+
+result_std <- minimum_path_sum(grid_perf)
+result_opt <- minimum_path_sum_optimized(grid_perf)
+
+cat("Standard DP result:", result_std$min_sum, "\n")
+cat("Optimized DP result:", result_opt, "\n")
+cat("Standard DP median time:", sprintf("%.6f sec", median(mbm$time[mbm$expr == "std"])/1e9), "\n")
+cat("Optimized DP median time:", sprintf("%.6f sec", median(mbm$time[mbm$expr == "opt"])/1e9), "\n")
+cat("Results match:", result_std$min_sum == result_opt, "\n\n")
+
+# Test 6: Multiple Minimum Paths
+cat("Test 6: Multiple Minimum Paths\n")
+grid_multiple <- matrix(c(1, 2, 1, 1, 1, 1, 1, 1, 1), nrow = 3, ncol = 3, byrow = TRUE)
+cat("Grid:\n")
+print(grid_multiple)
+
+result_multiple <- minimum_path_sum(grid_multiple)
+cat("Minimum Path Sum:", result_multiple$min_sum, "\n")
+cat("One possible path:", paste(sapply(result_multiple$path, function(x) paste("(", x[1], ",", x[2], ")", sep="")), collapse = " -> "), "\n")
+
+# Find all minimum paths
+all_paths <- find_all_minimum_paths(grid_multiple)
+cat("Total number of minimum paths:", length(all_paths), "\n")
+for (i in seq_along(all_paths)) {
+ path_str <- paste(sapply(all_paths[[i]], function(x) paste("(", x[1], ",", x[2], ")", sep="")), collapse = " -> ")
+ path_sum <- sum(sapply(all_paths[[i]], function(x) grid_multiple[x[1], x[2]]))
+ cat("Path", i, ":", path_str, "(sum =", path_sum, ")\n")
+}
+cat("\n")
+
+# Test 7: Real-world Example - Cost Optimization
+cat("Test 7: Real-world Example - Transportation Cost Optimization\n")
+# Grid representing transportation costs between cities
+transport_grid <- matrix(c(
+ 2, 3, 4, 2, 1,
+ 1, 2, 1, 3, 2,
+ 3, 1, 2, 1, 4,
+ 2, 4, 1, 2, 3
+), nrow = 4, ncol = 5, byrow = TRUE)
+
+cat("Transportation Cost Grid:\n")
+print(transport_grid)
+
+transport_result <- minimum_path_sum(transport_grid)
+cat("Minimum transportation cost:", transport_result$min_sum, "\n")
+cat("Optimal route:", paste(sapply(transport_result$path, function(x) paste("City(", x[1], ",", x[2], ")", sep="")), collapse = " -> "), "\n")
+visualize_path(transport_grid, transport_result$path)
+
+# Calculate cost breakdown
+cat("Cost breakdown:\n")
+total_cost <- 0
+for (i in seq_along(transport_result$path)) {
+ pos <- transport_result$path[[i]]
+ cost <- transport_grid[pos[1], pos[2]]
+ total_cost <- total_cost + cost
+ cat(" Step", i, ": City(", pos[1], ",", pos[2], ") =", cost, "\n")
+}
+cat("Total cost verification:", total_cost, "\n")
diff --git a/Desktop/open-source/R/dynamic_programming/subset_sum.r b/Desktop/open-source/R/dynamic_programming/subset_sum.r
new file mode 100644
index 00000000..8d4cf6bb
--- /dev/null
+++ b/Desktop/open-source/R/dynamic_programming/subset_sum.r
@@ -0,0 +1,288 @@
+# Subset Sum Problem
+#
+# The Subset Sum problem determines whether there exists a subset of a given set
+# of positive integers that sums to a target value. This is a classic NP-complete
+# problem solved using dynamic programming.
+#
+# Time Complexity: O(n * sum) where n = number of elements, sum = target sum
+# Space Complexity: O(n * sum) for DP table, O(sum) for optimized version
+#
+# Applications:
+# - Partition problems in computer science
+# - Knapsack problem variations
+# - Resource allocation and optimization
+# - Cryptography and number theory
+# - Game theory and decision making
+
+# Basic DP solution for Subset Sum Problem
+subset_sum <- function(arr, target) {
+ #' Check if there exists a subset that sums to the target value
+ #' @param arr: Numeric vector of positive integers
+ #' @param target: Target sum value
+ #' @return: Boolean indicating if subset exists, along with DP table
+
+ n <- length(arr)
+
+ # Handle edge cases
+ if (n == 0) {
+ return(list(
+ exists = (target == 0),
+ dp_table = matrix(FALSE, nrow = 1, ncol = 1),
+ subset = c()
+ ))
+ }
+
+ if (target == 0) {
+ return(list(
+ exists = TRUE,
+ dp_table = matrix(TRUE, nrow = n + 1, ncol = 1),
+ subset = c()
+ ))
+ }
+
+ # Create DP table: dp[i, j] = TRUE if sum j can be achieved using first i elements
+ dp <- matrix(FALSE, nrow = n + 1, ncol = target + 1)
+
+ # Base case: sum 0 can always be achieved with empty subset
+ for (i in 1:(n + 1)) {
+ dp[i, 1] <- TRUE
+ }
+
+ # Fill DP table
+ for (i in 2:(n + 1)) {
+ for (j in 1:(target + 1)) {
+ current_sum <- j - 1 # Convert to 0-based indexing
+
+ # Don't include current element
+ dp[i, j] <- dp[i - 1, j]
+
+ # Include current element (if it doesn't exceed current sum)
+ if (arr[i - 1] <= current_sum) {
+ dp[i, j] <- dp[i, j] || dp[i - 1, j - arr[i - 1]]
+ }
+ }
+ }
+
+ # Backtrack to find one possible subset
+ subset <- c()
+ if (dp[n + 1, target + 1]) {
+ i <- n + 1
+ j <- target + 1
+
+ while (i > 1 && j > 1) {
+ # If current sum was achieved without including arr[i-1]
+ if (dp[i - 1, j]) {
+ i <- i - 1
+ } else {
+ # Current element was included
+ subset <- c(arr[i - 1], subset)
+ j <- j - arr[i - 1]
+ i <- i - 1
+ }
+ }
+ }
+
+ return(list(
+ exists = dp[n + 1, target + 1],
+ dp_table = dp,
+ subset = subset
+ ))
+}
+
+# Space-optimized version using only 1D array
+subset_sum_optimized <- function(arr, target) {
+ #' Space optimized subset sum using 1D array
+ #' @param arr: Numeric vector of positive integers
+ #' @param target: Target sum value
+ #' @return: Boolean indicating if subset exists
+
+ n <- length(arr)
+
+ if (n == 0) return(target == 0)
+ if (target == 0) return(TRUE)
+
+ dp <- rep(FALSE, target + 1)
+ dp[1] <- TRUE # sum 0 is always possible
+
+ for (i in 1:n) {
+ # Traverse from right to left to avoid overwriting needed values
+ for (j in target:1) {
+ if (arr[i] <= j) {
+ dp[j + 1] <- dp[j + 1] || dp[j - arr[i] + 1]
+ }
+ }
+ }
+
+ return(dp[target + 1])
+}
+
+# Function to find all subsets that sum to target
+find_all_subsets <- function(arr, target) {
+ #' Find all subsets that sum to the target value
+ #' @param arr: Numeric vector of positive integers
+ #' @param target: Target sum value
+ #' @return: List of subsets (each subset is a numeric vector) that sum to target
+
+ n <- length(arr)
+ results <- list()
+
+ # Helper recursive function
+ find_subsets_rec <- function(idx, current_subset, current_sum) {
+ if (current_sum == target) {
+ results <<- c(results, list(current_subset))
+ return()
+ }
+ if (idx > n || current_sum > target) {
+ return()
+ }
+ # Include arr[idx]
+ find_subsets_rec(idx + 1, c(current_subset, arr[idx]), current_sum + arr[idx])
+ # Exclude arr[idx]
+ find_subsets_rec(idx + 1, current_subset, current_sum)
+ }
+
+ find_subsets_rec(1, c(), 0)
+ return(results)
+}
+
+# Helper function to print DP table
+print_subset_sum_dp <- function(dp_table, arr, target) {
+ cat("DP Table for Subset Sum Problem:\n")
+ cat("Array:", paste(arr, collapse = ", "), "\n")
+ cat("Target Sum:", target, "\n\n")
+
+ # Print column headers (sums)
+ cat(" ")
+ cat(paste(sprintf("%4d", 0:target), collapse = " "))
+ cat("\n")
+ cat(paste(rep("-", 8 + 5 * (target + 1)), collapse = ""), "\n")
+
+ for (i in 1:nrow(dp_table)) {
+ if (i == 1) {
+ cat("Empty | ")
+ } else {
+ cat(sprintf("Elem%2d| ", i - 1))
+ }
+
+ for (j in 1:ncol(dp_table)) {
+ cat(sprintf("%4s", ifelse(dp_table[i, j], " T", " F")))
+ }
+ cat("\n")
+ }
+ cat("\n")
+}
+
+# ===========================
+# Example Usage & Testing
+# ===========================
+cat("=== Subset Sum Problem (Dynamic Programming) ===\n\n")
+
+# Test 1: Basic Example
+arr1 <- c(3, 34, 4, 12, 5, 2)
+target1 <- 9
+cat("Test 1: Basic Example\n")
+cat("Array:", paste(arr1, collapse = ", "), "\n")
+cat("Target Sum:", target1, "\n\n")
+
+result1 <- subset_sum(arr1, target1)
+print_subset_sum_dp(result1$dp_table, arr1, target1)
+cat("Subset exists:", result1$exists, "\n")
+if (result1$exists) {
+ cat("One possible subset:", paste(result1$subset, collapse = ", "), "\n")
+ cat("Sum verification:", sum(result1$subset), "\n")
+}
+cat("\n")
+
+# Test 2: Optimized Version
+cat("Test 2: Space Optimized Version\n")
+exists_opt <- subset_sum_optimized(arr1, target1)
+cat("Subset exists (Optimized):", exists_opt, "\n")
+cat("Verification: Both methods match:", result1$exists == exists_opt, "\n\n")
+
+# Test 3: No Solution Case
+cat("Test 3: No Solution Case\n")
+arr3 <- c(3, 34, 4, 12, 5, 2)
+target3 <- 30
+cat("Array:", paste(arr3, collapse = ", "), "\n")
+cat("Target Sum:", target3, "\n")
+
+result3 <- subset_sum(arr3, target3)
+cat("Subset exists:", result3$exists, "\n\n")
+
+# Test 4: Multiple Solutions
+cat("Test 4: Multiple Solutions\n")
+arr4 <- c(1, 2, 3, 4, 5)
+target4 <- 6
+cat("Array:", paste(arr4, collapse = ", "), "\n")
+cat("Target Sum:", target4, "\n")
+
+result4 <- subset_sum(arr4, target4)
+cat("Subset exists:", result4$exists, "\n")
+if (result4$exists) {
+ cat("One possible subset:", paste(result4$subset, collapse = ", "), "\n")
+
+ # Find all possible subsets
+ all_subsets <- find_all_subsets(arr4, target4)
+ cat("Total number of subsets:", length(all_subsets), "\n")
+ for (i in seq_along(all_subsets)) {
+ cat("Subset", i, ":", paste(all_subsets[[i]], collapse = ", "),
+ "(sum =", sum(all_subsets[[i]]), ")\n")
+ }
+}
+cat("\n")
+
+# Test 5: Edge Cases
+cat("Test 5: Edge Cases\n")
+cat("Empty array, target 0:", subset_sum(c(), 0)$exists, "\n")
+cat("Empty array, target 5:", subset_sum(c(), 5)$exists, "\n")
+cat("Array [1,2,3], target 0:", subset_sum(c(1, 2, 3), 0)$exists, "\n")
+cat("Array [5], target 5:", subset_sum(c(5), 5)$exists, "\n")
+cat("Array [5], target 3:", subset_sum(c(5), 3)$exists, "\n\n")
+
+# Test 6: Larger Dataset
+cat("Test 6: Larger Dataset (n=15)\n")
+set.seed(42)
+arr_large <- sample(1:20, 15)
+target_large <- 50
+cat("Array:", paste(arr_large, collapse = ", "), "\n")
+cat("Target Sum:", target_large, "\n")
+
+start_time <- Sys.time()
+result_large <- subset_sum(arr_large, target_large)
+dp_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+start_time <- Sys.time()
+exists_large_opt <- subset_sum_optimized(arr_large, target_large)
+opt_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Subset exists:", result_large$exists, "\n")
+cat("DP method time:", sprintf("%.4f sec", dp_time), "\n")
+cat("Optimized method time:", sprintf("%.4f sec", opt_time), "\n")
+cat("Results match:", result_large$exists == exists_large_opt, "\n")
+
+if (result_large$exists) {
+ cat("One possible subset:", paste(result_large$subset, collapse = ", "), "\n")
+ cat("Sum verification:", sum(result_large$subset), "\n")
+}
+cat("\n")
+
+# Test 7: Real-world Example - Budget Allocation
+cat("Test 7: Real-world Example - Budget Allocation\n")
+project_costs <- c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100)
+budget <- 150
+cat("Project costs:", paste(project_costs, collapse = ", "), "\n")
+cat("Available budget:", budget, "\n")
+
+budget_result <- subset_sum(project_costs, budget)
+cat("Exact budget allocation possible:", budget_result$exists, "\n")
+
+if (budget_result$exists) {
+ selected_projects <- budget_result$subset
+ cat("Selected projects (costs):", paste(selected_projects, collapse = ", "), "\n")
+ cat("Total cost:", sum(selected_projects), "\n")
+ cat("Remaining budget:", budget - sum(selected_projects), "\n")
+} else {
+ # Find closest possible sum (≤ budget) in a single pass
+ closest_sum <- max_subset_sum_leq(project_costs, budget)
+ cat("Closest possible sum:", closest_sum, "\n")
+}
diff --git a/Desktop/open-source/R/graph_algorithms/bellman_ford_shortest_path.r b/Desktop/open-source/R/graph_algorithms/bellman_ford_shortest_path.r
new file mode 100644
index 00000000..f6d06a74
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/bellman_ford_shortest_path.r
@@ -0,0 +1,146 @@
+# Bellman-Ford Shortest Path Algorithm
+#
+# The Bellman-Ford algorithm computes shortest paths from a single source vertex to
+# all other vertices in a weighted graph. Unlike Dijkstra's algorithm, Bellman-Ford
+# supports graphs with negative edge weights and can detect negative-weight cycles.
+#
+# Time Complexity: O(V * E)
+# Space Complexity: O(V)
+#
+# Input: graph as an adjacency list where each entry is a list of edges with fields
+# `vertex` and `weight`, and `source` vertex index (integer)
+# Output: A list containing distances, predecessors, and a flag indicating whether
+# a negative cycle was detected
+
+bellman_ford_shortest_path <- function(graph, source) {
+ # Collect all vertices (numeric indices expected)
+ all_vertices <- unique(c(names(graph), unlist(lapply(graph, function(x) sapply(x, function(e) e$vertex)))))
+ # Convert to numeric vector
+ all_vertices <- as.numeric(all_vertices)
+ num_vertices <- max(all_vertices)
+
+ # Initialize distances and predecessors
+ distances <- rep(Inf, num_vertices)
+ predecessor <- rep(-1, num_vertices)
+
+ distances[source] <- 0
+
+ # Relax edges repeatedly (V-1 times)
+ for (i in 1:(num_vertices - 1)) {
+ updated <- FALSE
+ # Iterate all edges
+ for (u_char in names(graph)) {
+ u <- as.numeric(u_char)
+ for (edge in graph[[u_char]]) {
+ v <- edge$vertex
+ w <- edge$weight
+ if (distances[u] != Inf && distances[u] + w < distances[v]) {
+ distances[v] <- distances[u] + w
+ predecessor[v] <- u
+ updated <- TRUE
+ }
+ }
+ }
+ # If no update in this pass, we can stop early
+ if (!updated) break
+ }
+
+ # Check for negative-weight cycles: if we can still relax, there is a negative cycle
+ negative_cycle <- FALSE
+ for (u_char in names(graph)) {
+ u <- as.numeric(u_char)
+ for (edge in graph[[u_char]]) {
+ v <- edge$vertex
+ w <- edge$weight
+ if (distances[u] != Inf && distances[u] + w < distances[v]) {
+ negative_cycle <- TRUE
+ break
+ }
+ }
+ if (negative_cycle) break
+ }
+
+ return(list(
+ distances = distances,
+ predecessor = predecessor,
+ negative_cycle = negative_cycle
+ ))
+}
+
+# Helper to reconstruct the shortest path from source to target
+get_bellman_ford_path <- function(result, source, target) {
+ if (result$negative_cycle) {
+ return(list(path = NULL, distance = NA, message = "Negative-weight cycle detected; shortest path undefined"))
+ }
+
+ distances <- result$distances
+ predecessor <- result$predecessor
+
+ if (is.infinite(distances[target])) {
+ return(list(path = NULL, distance = Inf, message = "Target not reachable from source"))
+ }
+
+ path <- c()
+ current <- target
+ while (current != -1) {
+ path <- c(current, path)
+ if (current == source) break
+ current <- predecessor[current]
+}
+
+ return(list(path = path, distance = distances[target]))
+}
+
+# Example usage and tests
+cat("=== Bellman-Ford Shortest Path Algorithm ===\n")
+
+# Example graph with negative edges but no negative cycle
+# Graph structure:
+# 1 -> 2 (6), 1 -> 3 (5), 1 -> 4 (5)
+# 2 -> 5 (-1)
+# 3 -> 2 (-2), 3 -> 5 (1)
+# 4 -> 3 (-2), 4 -> 6 (-1)
+# 5 -> 6 (3)
+# 6 -> (none)
+bf_graph <- list(
+ "1" = list(list(vertex = 2, weight = 6), list(vertex = 3, weight = 5), list(vertex = 4, weight = 5)),
+ "2" = list(list(vertex = 5, weight = -1)),
+ "3" = list(list(vertex = 2, weight = -2), list(vertex = 5, weight = 1)),
+ "4" = list(list(vertex = 3, weight = -2), list(vertex = 6, weight = -1)),
+ "5" = list(list(vertex = 6, weight = 3)),
+ "6" = list()
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(bf_graph)) {
+ edges <- bf_graph[[v]]
+ if (length(edges) > 0) {
+ edge_strs <- sapply(edges, function(e) paste0(e$vertex, "(", e$weight, ")"))
+ cat("Vertex", v, "-> [", paste(edge_strs, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", v, "-> []\n")
+ }
+}
+
+cat("\nRunning Bellman-Ford from vertex 1:\n")
+bf_result <- bellman_ford_shortest_path(bf_graph, 1)
+cat("Negative cycle detected:", bf_result$negative_cycle, "\n")
+
+cat("Distances from vertex 1:\n")
+for (i in 1:length(bf_result$distances)) {
+ d <- bf_result$distances[i]
+ if (is.infinite(d)) {
+ cat("To vertex", i, ": unreachable\n")
+ } else {
+ cat("To vertex", i, ": distance =", d, "\n")
+ }
+}
+
+cat("\nShortest path from 1 to 6:\n")
+path_info <- get_bellman_ford_path(bf_result, 1, 6)
+if (!is.null(path_info$path)) {
+ cat("Path:", paste(path_info$path, collapse = " -> "), "\n")
+ cat("Distance:", path_info$distance, "\n")
+} else {
+ cat(path_info$message, "\n")
+}
diff --git a/Desktop/open-source/R/graph_algorithms/breadth_first_search.r b/Desktop/open-source/R/graph_algorithms/breadth_first_search.r
new file mode 100644
index 00000000..049fc4ef
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/breadth_first_search.r
@@ -0,0 +1,217 @@
+# Breadth-First Search (BFS) Algorithm
+#
+# BFS is a graph traversal algorithm that explores all vertices at the current depth
+# before moving to vertices at the next depth level. It uses a queue data structure.
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and queue
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during BFS traversal
+
+# BFS function using queue (implemented with vector)
+breadth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and queue
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+ result <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# BFS to find shortest path between two vertices
+bfs_shortest_path <- function(graph, start_vertex, end_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue, and parent tracking
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ parent <- rep(-1, max(all_vertices))
+ names(parent) <- 1:max(all_vertices)
+ queue <- c(start_vertex)
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex from front of queue
+ vertex <- queue[1]
+ queue <- queue[-1]
+
+ # If we reached the target vertex, reconstruct path
+ if (vertex == end_vertex) {
+ path <- c()
+ current <- end_vertex
+
+ # Backtrack using parent array
+ while (current != -1) {
+ path <- c(current, path)
+ current <- parent[current]
+ }
+
+ return(list(
+ path = path,
+ distance = length(path) - 1
+ ))
+ }
+
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ parent[neighbor] <- vertex
+ queue <- c(queue, neighbor)
+ }
+ }
+ }
+ }
+
+ # No path found
+ return(list(
+ path = NULL,
+ distance = -1
+ ))
+}
+
+# BFS to find all vertices at a specific distance
+bfs_vertices_at_distance <- function(graph, start_vertex, target_distance) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array, queue with distances
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ queue <- list(list(vertex = start_vertex, distance = 0))
+ vertices_at_distance <- c()
+
+ # Mark starting vertex as visited
+ visited[start_vertex] <- TRUE
+
+ while (length(queue) > 0) {
+ # Dequeue vertex with distance from front of queue
+ current <- queue[[1]]
+ queue <- queue[-1]
+ vertex <- current$vertex
+ distance <- current$distance
+
+ # If current distance matches target, add to result
+ if (distance == target_distance) {
+ vertices_at_distance <- c(vertices_at_distance, vertex)
+ }
+
+ # Don't explore further if we've reached target distance
+ if (distance < target_distance) {
+ # Add all unvisited neighbors to queue
+ if (as.character(vertex) %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ visited[neighbor] <- TRUE
+ queue <- c(queue, list(list(vertex = neighbor, distance = distance + 1)))
+ }
+ }
+ }
+ }
+ }
+
+ return(vertices_at_distance)
+}
+
+# Example usage and testing
+cat("=== Breadth-First Search (BFS) Algorithm ===\n")
+
+# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+
+# Test BFS traversal
+cat("\nBFS starting from vertex 1:\n")
+result <- breadth_first_search(graph, 1)
+cat("Traversal order:", paste(result, collapse = " -> "), "\n")
+
+# Test BFS from different starting vertex
+cat("\nBFS starting from vertex 2:\n")
+result_from_2 <- breadth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+
+# Test shortest path finding
+cat("\n=== Shortest Path Finding ===\n")
+path_result <- bfs_shortest_path(graph, 1, 5)
+if (!is.null(path_result$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_result$path, collapse = " -> "), "\n")
+ cat("Distance:", path_result$distance, "\n")
+} else {
+ cat("No path found from 1 to 5\n")
+}
+
+# Test vertices at specific distance
+cat("\n=== Vertices at Specific Distance ===\n")
+vertices_dist_2 <- bfs_vertices_at_distance(graph, 1, 2)
+cat("Vertices at distance 2 from vertex 1:", paste(vertices_dist_2, collapse = ", "), "\n")
+
+# Example with a more complex connected graph
+cat("\n=== Example with More Complex Graph ===\n")
+complex_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4, 5),
+ "3" = c(1, 6),
+ "4" = c(2, 7),
+ "5" = c(2, 8),
+ "6" = c(3, 9),
+ "7" = c(4),
+ "8" = c(5),
+ "9" = c(6)
+)
+
+cat("Complex graph BFS starting from vertex 1:\n")
+complex_result <- breadth_first_search(complex_graph, 1)
+cat("Traversal order:", paste(complex_result, collapse = " -> "), "\n")
+
+# Test shortest path in complex graph
+path_complex <- bfs_shortest_path(complex_graph, 1, 9)
+if (!is.null(path_complex$path)) {
+ cat("Shortest path from 1 to 9:", paste(path_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_complex$distance, "\n")
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/graph_algorithms/bridge_detector.r b/Desktop/open-source/R/graph_algorithms/bridge_detector.r
new file mode 100644
index 00000000..ffd6eeac
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/bridge_detector.r
@@ -0,0 +1,226 @@
+# bridge_detector.r
+# Bridge Detection Algorithm in R using Tarjan's Algorithm
+# Finds all critical edges (bridges) in an undirected graph.
+# A bridge is an edge whose removal increases the number of connected components.
+#
+# Algorithm details:
+# - Uses DFS traversal with discovery time and low-link values
+# - disc[v]: Discovery time of vertex v in DFS
+# - low[v]: Earliest discovered vertex reachable from v's subtree
+# - Bridge condition: For edge (u,v), if low[v] > disc[u], then (u,v) is a bridge
+# - Time complexity: O(V + E) where V = vertices, E = edges
+# - Space complexity: O(V) for visited, discovery, and low-link arrays
+
+library(R6)
+
+BridgeDetector <- R6Class(
+ "BridgeDetector",
+
+ public = list(
+ vertices = NULL,
+ graph = NULL,
+ time_counter = NULL,
+
+ initialize = function(n_vertices) {
+ "Initialize the bridge detector with specified number of vertices"
+ if (!is.numeric(n_vertices) || n_vertices < 0 || n_vertices != round(n_vertices)) {
+ stop("Number of vertices must be a non-negative integer")
+ }
+ self$vertices <- n_vertices
+ self$graph <- vector("list", n_vertices)
+ self$time_counter <- 0
+
+ for (i in seq_len(n_vertices)) {
+ self$graph[[i]] <- integer(0)
+ }
+ },
+
+ add_edge = function(u, v) {
+ "Add an undirected edge between vertices u and v (0-indexed)"
+ if (!is.numeric(u) || !is.numeric(v) ||
+ u < 0 || v < 0 ||
+ u >= self$vertices || v >= self$vertices ||
+ u != round(u) || v != round(v)) {
+ stop("Vertex indices must be integers in range [0, n_vertices-1]")
+ }
+ u_idx <- u + 1
+ v_idx <- v + 1
+ self$graph[[u_idx]] <- c(self$graph[[u_idx]], v_idx)
+ self$graph[[v_idx]] <- c(self$graph[[v_idx]], u_idx)
+ },
+
+ find_bridges = function() {
+ "Find all bridges in the graph using Tarjan's algorithm"
+ visited <- rep(FALSE, self$vertices)
+ disc <- rep(Inf, self$vertices)
+ low <- rep(Inf, self$vertices)
+ parent <- rep(-1, self$vertices)
+ bridges <- list()
+
+ self$time_counter <- 0
+
+ for (v in seq_len(self$vertices)) {
+ if (!visited[v]) {
+ res <- private$dfs_bridge(v, visited, disc, low, parent, bridges)
+ visited <- res$visited
+ disc <- res$disc
+ low <- res$low
+ parent <- res$parent
+ bridges <- res$bridges
+ }
+ }
+ return(bridges)
+ },
+
+ print_graph = function() {
+ "Print adjacency list of the graph (0-indexed)"
+ cat("Graph Adjacency List:\n")
+ for (i in seq_len(self$vertices)) {
+ neighbors <- if (length(self$graph[[i]]) > 0) {
+ paste(self$graph[[i]] - 1, collapse = ", ")
+ } else {
+ "none"
+ }
+ cat(sprintf("Vertex %d: %s\n", i - 1, neighbors))
+ }
+ }
+ ),
+
+ private = list(
+ dfs_bridge = function(u, visited, disc, low, parent, bridges) {
+ visited[u] <- TRUE
+ disc[u] <- self$time_counter
+ low[u] <- self$time_counter
+ self$time_counter <- self$time_counter + 1
+
+ for (v_idx in self$graph[[u]]) {
+ if (!visited[v_idx]) {
+ parent[v_idx] <- u
+ res <- private$dfs_bridge(v_idx, visited, disc, low, parent, bridges)
+ visited <- res$visited
+ disc <- res$disc
+ low <- res$low
+ parent <- res$parent
+ bridges <- res$bridges
+
+ low[u] <- min(low[u], low[v_idx])
+
+ if (low[v_idx] > disc[u]) {
+ bridges[[length(bridges) + 1]] <- c(u - 1, v_idx - 1)
+ }
+ } else if (v_idx != parent[u]) {
+ low[u] <- min(low[u], disc[v_idx])
+ }
+ }
+
+ return(list(
+ visited = visited,
+ disc = disc,
+ low = low,
+ parent = parent,
+ bridges = bridges
+ ))
+ }
+ )
+)
+
+# Demonstration
+demonstrate_bridge_detection <- function() {
+ cat("=== Bridge Detection Algorithm Demo ===\n\n")
+
+ # Example 1
+ cat("Example 1: Simple network with bridges\n")
+ cat("Graph: 0-1-2-3\n")
+ cat(" | |\n")
+ cat(" 4 5\n\n")
+
+ detector1 <- BridgeDetector$new(6)
+ detector1$add_edge(0, 1)
+ detector1$add_edge(1, 2)
+ detector1$add_edge(2, 3)
+ detector1$add_edge(0, 4)
+ detector1$add_edge(2, 5)
+
+ bridges1 <- detector1$find_bridges()
+ cat("Bridges found:\n")
+ for (b in bridges1) {
+ cat(sprintf(" (%d, %d)\n", b[1], b[2]))
+ }
+ cat("All edges are critical - removing any disconnects the network.\n\n")
+
+ # Example 2
+ cat("Example 2: Network with cycle (no bridges)\n")
+ cat("Graph: 0-1-2\n")
+ cat(" | |\n")
+ cat(" +---+\n\n")
+
+ detector2 <- BridgeDetector$new(3)
+ detector2$add_edge(0, 1)
+ detector2$add_edge(1, 2)
+ detector2$add_edge(2, 0)
+
+ bridges2 <- detector2$find_bridges()
+ if (length(bridges2) == 0) {
+ cat("Bridges found: None\n")
+ cat("The cycle provides redundancy - no single edge is critical.\n\n")
+ }
+
+ # Example 3
+ cat("Example 3: Complex network topology\n")
+ detector3 <- BridgeDetector$new(7)
+ detector3$add_edge(0, 1)
+ detector3$add_edge(1, 2)
+ detector3$add_edge(2, 0)
+ detector3$add_edge(1, 3)
+ detector3$add_edge(3, 4)
+ detector3$add_edge(4, 5)
+ detector3$add_edge(5, 6)
+ detector3$add_edge(6, 4)
+
+ bridges3 <- detector3$find_bridges()
+ cat("Bridges found:\n")
+ for (b in bridges3) {
+ cat(sprintf(" (%d, %d)\n", b[1], b[2]))
+ }
+ cat("Edge (1,3) connects two robust sub-networks.\n\n")
+
+ # Example 4: Testing print_graph
+ cat("Example 4: Viewing graph structure\n")
+ detector4 <- BridgeDetector$new(4)
+ detector4$add_edge(0, 1)
+ detector4$add_edge(1, 2)
+ detector4$add_edge(2, 3)
+ detector4$print_graph()
+ cat("\n")
+
+ # Example 5: Edge cases
+ cat("Example 5: Edge cases\n")
+
+ # Empty graph
+ detector5 <- BridgeDetector$new(0)
+ bridges5 <- detector5$find_bridges()
+ cat("Empty graph bridges: ", length(bridges5), "\n")
+
+ # Single edge
+ detector6 <- BridgeDetector$new(2)
+ detector6$add_edge(0, 1)
+ bridges6 <- detector6$find_bridges()
+ cat("Single edge graph bridges:\n")
+ for (b in bridges6) {
+ cat(sprintf(" (%d, %d)\n", b[1], b[2]))
+ }
+
+ # Disconnected components
+ detector7 <- BridgeDetector$new(4)
+ detector7$add_edge(0, 1)
+ detector7$add_edge(2, 3)
+ bridges7 <- detector7$find_bridges()
+ cat("Disconnected components bridges:\n")
+ for (b in bridges7) {
+ cat(sprintf(" (%d, %d)\n", b[1], b[2]))
+ }
+ cat("\n")
+}
+
+# Run demo
+demonstrate_bridge_detection()
\ No newline at end of file
diff --git a/Desktop/open-source/R/graph_algorithms/depth_first_search.r b/Desktop/open-source/R/graph_algorithms/depth_first_search.r
new file mode 100644
index 00000000..70532463
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/depth_first_search.r
@@ -0,0 +1,138 @@
+# Depth-First Search (DFS) Algorithm
+#
+# DFS is a graph traversal algorithm that explores as far as possible along each branch
+# before backtracking. It uses a stack data structure (implemented via recursion here).
+#
+# Time Complexity: O(V + E) where V is vertices and E is edges
+# Space Complexity: O(V) for the visited array and recursion stack
+#
+# Input: An adjacency list representation of a graph and a starting vertex
+# Output: The order in which vertices are visited during DFS traversal
+
+# Recursive DFS function
+dfs_recursive <- function(graph, vertex, visited, result) {
+ # Mark current vertex as visited
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Visit all unvisited adjacent vertices
+ if (vertex %in% names(graph)) {
+ for (neighbor in graph[[as.character(vertex)]]) {
+ if (!visited[neighbor]) {
+ result <- dfs_recursive(graph, neighbor, visited, result)
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Main DFS function
+depth_first_search <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+
+ # Perform DFS starting from the given vertex
+ result <- dfs_recursive(graph, start_vertex, visited, c())
+
+ return(result)
+}
+
+# Iterative DFS function using explicit stack
+dfs_iterative <- function(graph, start_vertex) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+
+ # Initialize visited array and stack
+ visited <- rep(FALSE, max(all_vertices))
+ names(visited) <- 1:max(all_vertices)
+ stack <- c(start_vertex)
+ result <- c()
+
+ while (length(stack) > 0) {
+ # Pop vertex from stack
+ vertex <- stack[length(stack)]
+ stack <- stack[-length(stack)]
+
+ if (!visited[vertex]) {
+ # Mark as visited and add to result
+ visited[vertex] <- TRUE
+ result <- c(result, vertex)
+
+ # Add all unvisited neighbors to stack (in reverse order to maintain left-to-right traversal)
+ if (as.character(vertex) %in% names(graph)) {
+ neighbors <- graph[[as.character(vertex)]]
+ for (neighbor in rev(neighbors)) {
+ if (!visited[neighbor]) {
+ stack <- c(stack, neighbor)
+ }
+ }
+ }
+ }
+ }
+
+ return(result)
+}
+
+# Example usage and testing
+cat("=== Depth-First Search (DFS) Algorithm ===\n")
+
+# Create a sample graph as adjacency list
+# Graph structure:
+# 1
+# / \
+# 2 3
+# / \ \
+# 4 5 6
+graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4, 5),
+ "3" = c(6),
+ "4" = c(),
+ "5" = c(),
+ "6" = c()
+)
+
+cat("Graph structure (adjacency list):\n")
+for (vertex in names(graph)) {
+ cat("Vertex", vertex, "-> [", paste(graph[[vertex]], collapse = ", "), "]\n")
+}
+
+# Test recursive DFS
+cat("\nRecursive DFS starting from vertex 1:\n")
+result_recursive <- depth_first_search(graph, 1)
+cat("Traversal order:", paste(result_recursive, collapse = " -> "), "\n")
+
+# Test iterative DFS
+cat("\nIterative DFS starting from vertex 1:\n")
+result_iterative <- dfs_iterative(graph, 1)
+cat("Traversal order:", paste(result_iterative, collapse = " -> "), "\n")
+
+# Test with different starting vertex
+cat("\nRecursive DFS starting from vertex 2:\n")
+result_from_2 <- depth_first_search(graph, 2)
+cat("Traversal order:", paste(result_from_2, collapse = " -> "), "\n")
+
+# Example with a more complex graph (with cycles)
+cat("\n=== Example with Cyclic Graph ===\n")
+cyclic_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(1, 4),
+ "3" = c(1, 5),
+ "4" = c(2, 6),
+ "5" = c(3, 6),
+ "6" = c(4, 5)
+)
+
+cat("Cyclic graph structure:\n")
+for (vertex in names(cyclic_graph)) {
+ cat("Vertex", vertex, "-> [", paste(cyclic_graph[[vertex]], collapse = ", "), "]\n")
+}
+
+cat("\nDFS on cyclic graph starting from vertex 1:\n")
+cyclic_result <- depth_first_search(cyclic_graph, 1)
+cat("Traversal order:", paste(cyclic_result, collapse = " -> "), "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/graph_algorithms/dijkstra_shortest_path.r b/Desktop/open-source/R/graph_algorithms/dijkstra_shortest_path.r
new file mode 100644
index 00000000..c9174e2d
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/dijkstra_shortest_path.r
@@ -0,0 +1,258 @@
+# Dijkstra's Shortest Path Algorithm
+#
+# Dijkstra's algorithm finds the shortest path between a source vertex and all other vertices
+# in a weighted graph with non-negative edge weights. It uses a greedy approach with a priority queue.
+#
+# Time Complexity: O((V + E) log V) with binary heap, O(V^2) with simple array
+# Space Complexity: O(V) for distance and visited arrays
+#
+# Input: A weighted graph represented as adjacency list with weights, and a source vertex
+# Output: Shortest distances from source to all vertices and the paths
+
+# Priority queue implementation using simple vector (for educational purposes)
+# In production, use more efficient data structures
+create_priority_queue <- function() {
+ list(
+ elements = data.frame(vertex = integer(0), distance = numeric(0)),
+ size = 0
+ )
+}
+
+# Insert element into priority queue
+pq_insert <- function(pq, vertex, distance) {
+ pq$elements <- rbind(pq$elements, data.frame(vertex = vertex, distance = distance))
+ pq$size <- pq$size + 1
+ return(pq)
+}
+
+# Extract minimum element from priority queue
+pq_extract_min <- function(pq) {
+ if (pq$size == 0) {
+ return(list(pq = pq, min_element = NULL))
+ }
+
+ min_idx <- which.min(pq$elements$distance)
+ min_element <- pq$elements[min_idx, ]
+ pq$elements <- pq$elements[-min_idx, ]
+ pq$size <- pq$size - 1
+
+ return(list(pq = pq, min_element = min_element))
+}
+
+# Check if priority queue is empty
+pq_is_empty <- function(pq) {
+ return(pq$size == 0)
+}
+
+# Main Dijkstra's algorithm implementation
+dijkstra_shortest_path <- function(graph, source) {
+ # Get all vertices in the graph
+ all_vertices <- unique(c(names(graph), unlist(lapply(graph, function(x) x$vertex))))
+ num_vertices <- max(all_vertices)
+
+ # Initialize distances and previous vertices
+ distances <- rep(Inf, num_vertices)
+ previous <- rep(-1, num_vertices)
+ visited <- rep(FALSE, num_vertices)
+
+ # Set source distance to 0
+ distances[source] <- 0
+
+ # Create priority queue and add source
+ pq <- create_priority_queue()
+ pq <- pq_insert(pq, source, 0)
+
+ while (!pq_is_empty(pq)) {
+ # Extract vertex with minimum distance
+ result <- pq_extract_min(pq)
+ pq <- result$pq
+ current <- result$min_element
+
+ if (is.null(current)) break
+
+ u <- current$vertex
+
+ # Skip if already visited
+ if (visited[u]) next
+
+ # Mark as visited
+ visited[u] <- TRUE
+
+ # Update distances to neighbors
+ if (as.character(u) %in% names(graph)) {
+ for (edge in graph[[as.character(u)]]) {
+ v <- edge$vertex
+ weight <- edge$weight
+
+ # Relaxation step
+ if (!visited[v] && distances[u] + weight < distances[v]) {
+ distances[v] <- distances[u] + weight
+ previous[v] <- u
+ pq <- pq_insert(pq, v, distances[v])
+ }
+ }
+ }
+ }
+
+ return(list(
+ distances = distances,
+ previous = previous
+ ))
+}
+
+# Reconstruct shortest path from source to target
+get_shortest_path <- function(dijkstra_result, source, target) {
+ previous <- dijkstra_result$previous
+ distances <- dijkstra_result$distances
+
+ # Check if target is reachable
+ if (distances[target] == Inf) {
+ return(list(
+ path = NULL,
+ distance = Inf
+ ))
+ }
+
+ # Reconstruct path by backtracking
+ path <- c()
+ current <- target
+
+ while (current != -1) {
+ path <- c(current, path)
+ current <- previous[current]
+ }
+
+ return(list(
+ path = path,
+ distance = distances[target]
+ ))
+}
+
+# Find shortest paths to all vertices
+get_all_shortest_paths <- function(dijkstra_result, source) {
+ distances <- dijkstra_result$distances
+ previous <- dijkstra_result$previous
+ paths <- list()
+
+ for (target in 1:length(distances)) {
+ if (distances[target] != Inf) {
+ path_result <- get_shortest_path(dijkstra_result, source, target)
+ paths[[as.character(target)]] <- path_result
+ }
+ }
+
+ return(paths)
+}
+
+# Example usage and testing
+cat("=== Dijkstra's Shortest Path Algorithm ===\n")
+
+# Create a weighted graph as adjacency list
+# Graph structure with weights:
+# 1
+# / \
+# 4/ \2
+# / \
+# 2 3
+# |3 /1
+# | /
+# 4-----5
+# 2
+weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 4),
+ list(vertex = 3, weight = 2)
+ ),
+ "2" = list(
+ list(vertex = 4, weight = 3)
+ ),
+ "3" = list(
+ list(vertex = 5, weight = 1)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 2)
+ ),
+ "5" = list()
+)
+
+cat("Weighted graph structure:\n")
+for (vertex in names(weighted_graph)) {
+ edges <- weighted_graph[[vertex]]
+ if (length(edges) > 0) {
+ edge_strs <- sapply(edges, function(e) paste0(e$vertex, "(", e$weight, ")"))
+ cat("Vertex", vertex, "-> [", paste(edge_strs, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", vertex, "-> []\n")
+ }
+}
+
+# Run Dijkstra's algorithm from vertex 1
+cat("\nRunning Dijkstra's algorithm from vertex 1:\n")
+result <- dijkstra_shortest_path(weighted_graph, 1)
+
+# Display shortest distances
+cat("Shortest distances from vertex 1:\n")
+for (i in 1:length(result$distances)) {
+ if (result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", result$distances[i], "\n")
+ }
+}
+
+# Get shortest path to specific vertex
+cat("\nShortest path from 1 to 5:\n")
+path_to_5 <- get_shortest_path(result, 1, 5)
+if (!is.null(path_to_5$path)) {
+ cat("Path:", paste(path_to_5$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5$distance, "\n")
+}
+
+# Get all shortest paths
+cat("\nAll shortest paths from vertex 1:\n")
+all_paths <- get_all_shortest_paths(result, 1)
+for (target in names(all_paths)) {
+ path_info <- all_paths[[target]]
+ cat("To vertex", target, ": ", paste(path_info$path, collapse = " -> "),
+ " (distance:", path_info$distance, ")\n")
+}
+
+# Example with a more complex graph
+cat("\n=== More Complex Weighted Graph Example ===\n")
+complex_weighted_graph <- list(
+ "1" = list(
+ list(vertex = 2, weight = 7),
+ list(vertex = 3, weight = 9),
+ list(vertex = 6, weight = 14)
+ ),
+ "2" = list(
+ list(vertex = 3, weight = 10),
+ list(vertex = 4, weight = 15)
+ ),
+ "3" = list(
+ list(vertex = 4, weight = 11),
+ list(vertex = 6, weight = 2)
+ ),
+ "4" = list(
+ list(vertex = 5, weight = 6)
+ ),
+ "5" = list(),
+ "6" = list(
+ list(vertex = 5, weight = 9)
+ )
+)
+
+cat("Complex weighted graph from vertex 1:\n")
+complex_result <- dijkstra_shortest_path(complex_weighted_graph, 1)
+
+cat("Shortest distances:\n")
+for (i in 1:length(complex_result$distances)) {
+ if (complex_result$distances[i] != Inf) {
+ cat("To vertex", i, ": distance =", complex_result$distances[i], "\n")
+ }
+}
+
+# Shortest path to vertex 5
+path_to_5_complex <- get_shortest_path(complex_result, 1, 5)
+if (!is.null(path_to_5_complex$path)) {
+ cat("Shortest path from 1 to 5:", paste(path_to_5_complex$path, collapse = " -> "), "\n")
+ cat("Distance:", path_to_5_complex$distance, "\n")
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/graph_algorithms/floyd_warshall.r b/Desktop/open-source/R/graph_algorithms/floyd_warshall.r
new file mode 100644
index 00000000..3f7fb199
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/floyd_warshall.r
@@ -0,0 +1,285 @@
+# Floyd-Warshall Algorithm Implementation in R
+# Finds shortest paths between all pairs of vertices in a weighted graph
+# Can handle negative edge weights, but not negative cycles
+# Time complexity: O(V^3) where V is number of vertices
+# Space complexity: O(V^2) for distance and predecessor matrices
+
+
+
+#' FloydWarshall Class
+#' @description R6 class implementing the Floyd-Warshall algorithm
+#' @details Finds shortest paths between all pairs of vertices in a weighted directed graph.
+#' @importFrom R6 R6Class
+#' Can handle:
+#' - Positive and negative edge weights
+#' - Direct path reconstruction
+#' - Cycle detection
+#' - Disconnected components (represented by Inf)
+FloydWarshall <- R6::R6Class(
+ "FloydWarshall",
+
+ public = list(
+ #' @description Initialize the algorithm with graph size
+ #' @param n_vertices Number of vertices in the graph
+ initialize = function(n_vertices) {
+ if (!is.numeric(n_vertices) || n_vertices < 1 || n_vertices != round(n_vertices)) {
+ stop("Number of vertices must be a positive integer (at least 1)")
+ }
+
+ self$n_vertices <- n_vertices
+ private$initialize_matrices()
+ invisible(self)
+ },
+
+ #' @description Add a weighted edge to the graph
+ #' @param from Source vertex (1-based indexing)
+ #' @param to Target vertex (1-based indexing)
+ #' @param weight Edge weight (can be negative)
+ add_edge = function(from, to, weight) {
+ private$validate_vertices(from, to)
+ if (!is.numeric(weight)) {
+ stop("Edge weight must be numeric")
+ }
+
+ private$dist_matrix[from, to] <- weight
+ private$pred_matrix[from, to] <- from
+ invisible(self)
+ },
+
+ #' @description Run the Floyd-Warshall algorithm
+ #' @return List containing distance matrix and presence of negative cycles
+ run = function() {
+ # Floyd-Warshall main loop
+ for (k in 1:self$n_vertices) {
+ for (i in 1:self$n_vertices) {
+ for (j in 1:self$n_vertices) {
+ if (!is.infinite(private$dist_matrix[i, k]) &&
+ !is.infinite(private$dist_matrix[k, j])) {
+ new_dist <- private$dist_matrix[i, k] + private$dist_matrix[k, j]
+ if (new_dist < private$dist_matrix[i, j]) {
+ private$dist_matrix[i, j] <- new_dist
+ private$pred_matrix[i, j] <- private$pred_matrix[k, j]
+ }
+ }
+ }
+ }
+ }
+
+ # Check for negative cycles
+ has_negative_cycle <- FALSE
+ for (i in 1:self$n_vertices) {
+ if (private$dist_matrix[i, i] < 0) {
+ has_negative_cycle <- TRUE
+ break
+ }
+ }
+
+ private$algorithm_run <- TRUE
+
+ return(list(
+ distances = private$dist_matrix,
+ has_negative_cycle = has_negative_cycle
+ ))
+ },
+
+ #' @description Get the shortest path between two vertices
+ #' @param from Source vertex
+ #' @param to Target vertex
+ #' @return List containing path and total distance
+ get_path = function(from, to) {
+ if (!private$algorithm_run) {
+ stop("Run the algorithm first using run()")
+ }
+
+ private$validate_vertices(from, to)
+
+ if (is.infinite(private$dist_matrix[from, to])) {
+ return(list(
+ path = numeric(0),
+ distance = Inf,
+ exists = FALSE
+ ))
+ }
+
+ # Reconstruct path backward from 'to' using pred[from, current], then reverse
+ path <- c()
+ current <- to
+
+ while (!is.na(current) && current != from) {
+ path <- c(current, path)
+ prev <- private$pred_matrix[from, current]
+
+ # Check for cycles
+ if (length(path) > self$n_vertices) {
+ stop("Negative cycle detected in path reconstruction")
+ }
+ current <- prev
+ }
+ if (is.na(current)) {
+ # No path exists
+ return(list(
+ path = numeric(0),
+ distance = Inf,
+ exists = FALSE
+ ))
+ }
+ path <- c(from, path)
+
+ return(list(
+ path = path,
+ distance = private$dist_matrix[from, to],
+ exists = TRUE
+ ))
+ },
+
+ #' @description Get minimum distances from a source vertex to all others
+ #' @param from Source vertex
+ #' @return Named vector of distances
+ get_distances_from = function(from) {
+ if (!private$algorithm_run) {
+ stop("Run the algorithm first using run()")
+ }
+
+ private$validate_vertices(from)
+ d <- private$dist_matrix[from, ]
+ names(d) <- as.character(seq_len(self$n_vertices))
+ return(d)
+ },
+
+ #' @description Check if the graph has a negative cycle
+ #' @return TRUE if negative cycle exists, FALSE otherwise
+ has_negative_cycle = function() {
+ if (!private$algorithm_run) {
+ stop("Run the algorithm first using run()")
+ }
+
+ for (i in 1:self$n_vertices) {
+ if (private$dist_matrix[i, i] < 0) {
+ return(TRUE)
+ }
+ }
+ return(FALSE)
+ },
+
+ #' @description Print the distance matrix
+ print_distances = function() {
+ if (!private$algorithm_run) {
+ stop("Run the algorithm first using run()")
+ }
+
+ cat("Distance Matrix:\n")
+ print(private$dist_matrix)
+ invisible(self)
+ },
+
+ # Public fields
+ n_vertices = NULL
+ ),
+
+ private = list(
+ dist_matrix = NULL,
+ pred_matrix = NULL,
+ algorithm_run = FALSE,
+
+ initialize_matrices = function() {
+ # Initialize distance matrix with Inf for non-adjacent vertices
+ private$dist_matrix <- matrix(Inf, nrow = self$n_vertices, ncol = self$n_vertices)
+ diag(private$dist_matrix) <- 0
+
+ # Initialize predecessor matrix
+ private$pred_matrix <- matrix(NA, nrow = self$n_vertices, ncol = self$n_vertices)
+ for (i in 1:self$n_vertices) {
+ private$pred_matrix[i, i] <- i
+ }
+ },
+
+ validate_vertices = function(from, to = NULL) {
+ vertices <- if (is.null(to)) from else c(from, to)
+
+ if (!all(is.numeric(vertices)) ||
+ !all(vertices == round(vertices)) ||
+ !all(vertices >= 1) ||
+ !all(vertices <= self$n_vertices)) {
+ stop("Vertex indices must be integers between 1 and ", self$n_vertices)
+ }
+ }
+ )
+)
+
+# Demonstration
+demonstrate_floyd_warshall <- function() {
+ cat("=== Floyd-Warshall Algorithm Demo ===\n\n")
+
+ # Example 1: Simple weighted graph
+ cat("Example 1: Simple weighted graph\n")
+ cat("Graph: 4 vertices with various weighted edges\n\n")
+
+ fw <- FloydWarshall$new(4)
+
+ # Add edges (with weights)
+ fw$add_edge(1, 2, 5)
+ fw$add_edge(2, 3, 3)
+ fw$add_edge(3, 4, 1)
+ fw$add_edge(1, 3, 10)
+ fw$add_edge(2, 4, 6)
+
+ # Run algorithm
+ result <- fw$run()
+
+ cat("All-pairs shortest distances:\n")
+ fw$print_distances()
+
+ # Get specific path
+ path_result <- fw$get_path(1, 4)
+ cat("\nShortest path from 1 to 4:\n")
+ cat(sprintf("Path: %s\n", paste(path_result$path, collapse = " → ")))
+ cat(sprintf("Distance: %g\n\n", path_result$distance))
+
+ # Example 2: Graph with negative weights
+ cat("Example 2: Graph with negative weights\n")
+ cat("Graph: 3 vertices with some negative edges\n\n")
+
+ fw2 <- FloydWarshall$new(3)
+ fw2$add_edge(1, 2, 4)
+ fw2$add_edge(2, 3, -2)
+ fw2$add_edge(1, 3, 5)
+
+ result2 <- fw2$run()
+
+ cat("All-pairs shortest distances:\n")
+ fw2$print_distances()
+
+ # Example 3: Negative cycle detection
+ cat("\nExample 3: Negative cycle detection\n")
+ cat("Graph: 3 vertices with a negative cycle\n\n")
+
+ fw3 <- FloydWarshall$new(3)
+ fw3$add_edge(1, 2, 1)
+ fw3$add_edge(2, 3, -5)
+ fw3$add_edge(3, 1, 2)
+
+ result3 <- fw3$run()
+
+ cat(sprintf("Contains negative cycle: %s\n\n",
+ ifelse(result3$has_negative_cycle, "Yes", "No")))
+
+ # Example 4: Disconnected components
+ cat("Example 4: Disconnected components\n")
+ cat("Graph: 4 vertices with two components\n\n")
+
+ fw4 <- FloydWarshall$new(4)
+ fw4$add_edge(1, 2, 3)
+ fw4$add_edge(3, 4, 2)
+
+ result4 <- fw4$run()
+
+ cat("All-pairs shortest distances:\n")
+ fw4$print_distances()
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration only if explicitly requested via environment variable
+if (identical(Sys.getenv("RUN_FLOYD_WARSHALL_DEMO"), "true")) {
+ demonstrate_floyd_warshall()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/graph_algorithms/hamilitonian_cycle.r b/Desktop/open-source/R/graph_algorithms/hamilitonian_cycle.r
new file mode 100644
index 00000000..be3a2950
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/hamilitonian_cycle.r
@@ -0,0 +1,102 @@
+# Hamiltonian Cycle Detection (Backtracking)
+#
+# The Hamiltonian Cycle problem determines whether there exists a cycle
+# in a graph that visits each vertex exactly once and returns to the start.
+# It is an NP-complete problem.
+#
+# This implementation uses backtracking to explore possible paths.
+#
+# Time Complexity: O(N!)
+# Space Complexity: O(N)
+#
+# Input: adjacency matrix (square matrix where adj[i][j] = 1 if edge exists)
+# Output: list containing:
+# - has_cycle: TRUE/FALSE
+# - cycle: list of vertices forming the Hamiltonian cycle (if found)
+#
+# Example usage at the end of this file.
+
+hamiltonian_cycle <- function(graph) {
+ num_vertices <- nrow(graph)
+ path <- rep(-1, num_vertices)
+
+ # Start at vertex 1 (can be any vertex)
+ path[1] <- 1
+
+ # Helper function to check if vertex v can be added at position pos
+ is_safe <- function(v, graph, path, pos) {
+ # Check if current vertex is adjacent to previous vertex
+ if (graph[path[pos - 1], v] == 0) return(FALSE)
+
+ # Check if vertex is already in the path
+ if (v %in% path[1:(pos - 1)]) return(FALSE)
+
+ return(TRUE)
+ }
+
+ # Recursive utility to build Hamiltonian cycle
+ ham_cycle_util <- function(graph, path, pos) {
+ if (pos == num_vertices + 1) {
+ # If last vertex connects to the first → cycle found
+ if (graph[path[num_vertices], path[1]] == 1) {
+ return(TRUE)
+ } else {
+ return(FALSE)
+ }
+ }
+
+ # Try adding each vertex as next candidate
+ for (v in 2:num_vertices) {
+ if (is_safe(v, graph, path, pos)) {
+ path[pos] <- v
+ if (ham_cycle_util(graph, path, pos + 1)) return(TRUE)
+ # Backtrack
+ path[pos] <- -1
+ }
+ }
+ return(FALSE)
+ }
+
+ # Start backtracking from vertex 1
+ if (ham_cycle_util(graph, path, 2)) {
+ path <- c(path, path[1]) # complete the cycle
+ return(list(has_cycle = TRUE, cycle = path))
+ } else {
+ return(list(has_cycle = FALSE, cycle = NULL))
+ }
+}
+
+# ============================
+# Example Usage and Test
+# ============================
+
+cat("=== Hamiltonian Cycle Detection ===\n")
+
+# Example graph (Adjacency Matrix)
+# Graph:
+# 1 - 2 - 3
+# | | |
+# 4 - 5 - 6
+# This graph contains a Hamiltonian cycle
+ham_graph <- matrix(
+ c(0,1,0,1,0,0,
+ 1,0,1,1,1,0,
+ 0,1,0,0,1,1,
+ 1,1,0,0,1,0,
+ 0,1,1,1,0,1,
+ 0,0,1,0,1,0),
+ nrow = 6, byrow = TRUE
+)
+
+cat("Adjacency Matrix:\n")
+print(ham_graph)
+
+cat("\nRunning Hamiltonian Cycle detection...\n")
+result <- hamiltonian_cycle(ham_graph)
+
+if (result$has_cycle) {
+ cat("Hamiltonian Cycle found:\n")
+ cat("Cycle:", paste(result$cycle, collapse = " -> "), "\n")
+} else {
+ cat("No Hamiltonian Cycle exists in this graph.\n")
+}
diff --git a/Desktop/open-source/R/graph_algorithms/hierholzer_eulerian.r b/Desktop/open-source/R/graph_algorithms/hierholzer_eulerian.r
new file mode 100644
index 00000000..923fd934
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/hierholzer_eulerian.r
@@ -0,0 +1,56 @@
+# Hierholzer's Algorithm for Eulerian Circuit
+#
+# Hierholzer's algorithm finds an Eulerian circuit in a graph, i.e., a closed path
+# that visits every edge exactly once. The graph must be connected, and all vertices
+# must have even degree (for undirected graphs) or in-degree = out-degree (for directed graphs).
+#
+# Time Complexity: O(E)
+# Space Complexity: O(V + E)
+#
+# Input: graph as an adjacency list (each entry is a vector of neighbors)
+# Output: Eulerian circuit as a vector of vertices in order
+
+hierholzer_eulerian <- function(graph) {
+ # Copy of the graph to modify during traversal
+ temp_graph <- lapply(graph, function(x) x)
+
+ circuit <- c()
+ stack <- c(1) # Start from vertex 1 (can choose any vertex)
+
+ while (length(stack) > 0) {
+ u <- tail(stack, 1)
+ if (length(temp_graph[[as.character(u)]]) > 0) {
+ v <- temp_graph[[as.character(u)]][1]
+ temp_graph[[as.character(u)]] <- temp_graph[[as.character(u)]][-1]
+ stack <- c(stack, v)
+ } else {
+ circuit <- c(stack[length(stack)], circuit)
+ stack <- stack[-length(stack)]
+ }
+ }
+
+ return(circuit)
+}
+
+# Example usage
+cat("=== Hierholzer's Algorithm for Eulerian Circuit ===\n")
+
+# Example undirected graph with Eulerian circuit:
+# 1-2, 1-3, 2-3, 2-4, 3-4
+hierholzer_graph <- list(
+ "1" = c(2,3),
+ "2" = c(1,3,4),
+ "3" = c(1,2,4),
+ "4" = c(2,3)
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(hierholzer_graph)) {
+ edges <- hierholzer_graph[[v]]
+ cat("Vertex", v, "-> [", paste(edges, collapse = ", "), "]\n")
+}
+
+cat("\nFinding Eulerian circuit:\n")
+euler_circuit <- hierholzer_eulerian(hierholzer_graph)
+cat("Eulerian circuit:", paste(euler_circuit, collapse = " -> "), "\n")
+
diff --git a/Desktop/open-source/R/graph_algorithms/hopcroft_karp_max_matching.r b/Desktop/open-source/R/graph_algorithms/hopcroft_karp_max_matching.r
new file mode 100644
index 00000000..d89e874c
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/hopcroft_karp_max_matching.r
@@ -0,0 +1,101 @@
+# Hopcroft-Karp Algorithm for Maximum Bipartite Matching
+#
+# The Hopcroft-Karp algorithm finds the maximum matching in a bipartite graph.
+# It alternates between BFS to find shortest augmenting paths and DFS to augment them.
+#
+# Time Complexity: O(E * sqrt(V))
+# Space Complexity: O(V + E)
+#
+# Input: bipartite graph as adjacency list (from left partition to right partition)
+# Output: Matching as a list where match_left[i] = matched vertex on right (or 0 if unmatched)
+
+hopcroft_karp <- function(graph, num_left, num_right) {
+ INF <- 1e9
+ match_left <- rep(0, num_left)
+ match_right <- rep(0, num_right)
+ dist <- rep(0, num_left)
+
+ bfs <- function() {
+ queue <- c()
+ for (u in 1:num_left) {
+ if (match_left[u] == 0) {
+ dist[u] <<- 0
+ queue <- c(queue, u)
+ } else {
+ dist[u] <<- INF
+ }
+ }
+ found <- FALSE
+ while (length(queue) > 0) {
+ u <- queue[1]; queue <- queue[-1]
+ for (v in graph[[as.character(u)]]) {
+ mv <- match_right[v]
+ if (mv == 0) {
+ found <- TRUE
+ } else if (dist[mv] == INF) {
+ dist[mv] <<- dist[u] + 1
+ queue <- c(queue, mv)
+ }
+ }
+ }
+ return(found)
+ }
+
+ dfs <- function(u) {
+ for (v in graph[[as.character(u)]]) {
+ mv <- match_right[v]
+ if (mv == 0 || (dist[mv] == dist[u] + 1 && dfs(mv))) {
+ match_left[u] <<- v
+ match_right[v] <<- u
+ return(TRUE)
+ }
+ }
+ dist[u] <<- INF
+ return(FALSE)
+ }
+
+ matching <- 0
+ while (bfs()) {
+ for (u in 1:num_left) {
+ if (match_left[u] == 0 && dfs(u)) {
+ matching <- matching + 1
+ }
+ }
+ }
+
+ return(list(match_left = match_left, match_right = match_right, max_matching = matching))
+}
+
+# Example usage
+cat("=== Hopcroft-Karp Maximum Bipartite Matching ===\n")
+
+# Bipartite graph: left partition 1..4, right partition 1..4
+# Graph edges: 1->1,1->2, 2->1, 3->2,3->3, 4->3,4->4
+bipartite_graph <- list(
+ "1" = c(1,2),
+ "2" = c(1),
+ "3" = c(2,3),
+ "4" = c(3,4)
+)
+
+num_left <- 4
+num_right <- 4
+
+cat("Graph (adjacency list from left partition):\n")
+for (u in names(bipartite_graph)) {
+ edges <- bipartite_graph[[u]]
+ cat("Vertex", u, "-> [", paste(edges, collapse = ", "), "]\n")
+}
+
+cat("\nRunning Hopcroft-Karp algorithm:\n")
+hk_result <- hopcroft_karp(bipartite_graph, num_left, num_right)
+cat("Maximum Matching Size:", hk_result$max_matching, "\n")
+
+cat("Matching from left partition:\n")
+for (i in 1:num_left) {
+ if (hk_result$match_left[i] != 0) {
+ cat("Left vertex", i, "-> Right vertex", hk_result$match_left[i], "\n")
+ } else {
+ cat("Left vertex", i, "is unmatched\n")
+ }
+}
diff --git a/Desktop/open-source/R/graph_algorithms/kosaraju_scc.r b/Desktop/open-source/R/graph_algorithms/kosaraju_scc.r
new file mode 100644
index 00000000..a10c5aa6
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/kosaraju_scc.r
@@ -0,0 +1,104 @@
+# Kosaraju's Strongly Connected Components (SCC) Algorithm
+#
+# Kosaraju's algorithm finds all strongly connected components in a directed graph.
+# It performs two depth-first searches (DFS):
+# 1. DFS on the original graph to compute finishing times of vertices.
+# 2. DFS on the transposed graph in the order of decreasing finishing times.
+#
+# Time Complexity: O(V + E)
+# Space Complexity: O(V + E)
+#
+# Input: graph as an adjacency list where each entry is a list of vertices it points to
+# Output: A list of SCCs (each SCC is a vector of vertices)
+
+kosaraju_scc <- function(graph) {
+
+ num_vertices <- max(as.numeric(names(graph)))
+ visited <- rep(FALSE, num_vertices)
+ finish_stack <- c()
+
+ # DFS to compute finishing times
+ dfs1 <- function(u) {
+ visited[u] <<- TRUE
+ for (v in graph[[as.character(u)]]) {
+ if (!visited[v]) dfs1(v)
+ }
+ finish_stack <<- c(u, finish_stack) # push to stack
+ }
+
+ # Step 1: Run DFS on all vertices
+ for (u in 1:num_vertices) {
+ if (!visited[u]) dfs1(u)
+ }
+
+ # Step 2: Transpose the graph
+ transposed <- vector("list", num_vertices)
+ for (i in 1:num_vertices) transposed[[i]] <- c()
+
+ for (u_char in names(graph)) {
+ u <- as.numeric(u_char)
+ for (v in graph[[u_char]]) {
+ transposed[[v]] <- c(transposed[[v]], u)
+ }
+ }
+
+ # Step 3: DFS on transposed graph in order of decreasing finish time
+ visited <- rep(FALSE, num_vertices)
+ scc_list <- list()
+
+ dfs2 <- function(u, current_scc) {
+ visited[u] <<- TRUE
+ current_scc <<- c(current_scc, u)
+ for (v in transposed[[u]]) {
+ if (!visited[v]) current_scc <<- dfs2(v, current_scc)
+ }
+ return(current_scc)
+ }
+
+ for (u in finish_stack) {
+ if (!visited[u]) {
+ scc <- dfs2(u, c())
+ scc_list <- append(scc_list, list(scc))
+ }
+ }
+
+ return(scc_list)
+}
+
+# Example usage
+cat("=== Kosaraju's Strongly Connected Components (SCC) Algorithm ===\n")
+
+# Example directed graph
+# Graph structure:
+# 1 -> 2
+# 2 -> 3
+# 3 -> 1, 4
+# 4 -> 5
+# 5 -> 6
+# 6 -> 4, 7
+# 7 -> (none)
+kosaraju_graph <- list(
+ "1" = c(2),
+ "2" = c(3),
+ "3" = c(1, 4),
+ "4" = c(5),
+ "5" = c(6),
+ "6" = c(4, 7),
+ "7" = c()
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(kosaraju_graph)) {
+ edges <- kosaraju_graph[[v]]
+ if (length(edges) > 0) {
+ cat("Vertex", v, "-> [", paste(edges, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", v, "-> []\n")
+ }
+}
+
+cat("\nFinding strongly connected components:\n")
+scc_result <- kosaraju_scc(kosaraju_graph)
+for (i in 1:length(scc_result)) {
+ cat("SCC", i, ":", paste(scc_result[[i]], collapse = ", "), "\n")
+}
diff --git a/Desktop/open-source/R/graph_algorithms/kruskal_mst.r b/Desktop/open-source/R/graph_algorithms/kruskal_mst.r
new file mode 100644
index 00000000..705f51d2
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/kruskal_mst.r
@@ -0,0 +1,124 @@
+# Kruskal's Minimum Spanning Tree (MST) Algorithm
+#
+# Kruskal's algorithm finds a subset of edges that connects all vertices in a graph
+# without any cycles and with the minimum possible total edge weight.
+#
+# It uses the Disjoint Set Union (DSU) or Union-Find data structure to detect cycles efficiently.
+#
+# Time Complexity: O(E log E) — dominated by sorting edges
+# Space Complexity: O(V)
+#
+# Input: graph as a list of edges (each edge has `u`, `v`, and `weight`)
+# Output: List containing MST edges and total minimum cost
+
+# -------------------------
+# Helper: Disjoint Set (Union-Find)
+# -------------------------
+
+make_set <- function(n) {
+ parent <- 1:n
+ rank <- rep(0, n)
+ return(list(parent = parent, rank = rank))
+}
+
+find_set <- function(parent, v) {
+ if (parent[v] != v) {
+ parent[v] <- find_set(parent, parent[v]) # Path compression
+ }
+ return(parent[v])
+}
+
+union_sets <- function(parent, rank, a, b) {
+ a_root <- find_set(parent, a)
+ b_root <- find_set(parent, b)
+
+ if (a_root != b_root) {
+ if (rank[a_root] < rank[b_root]) {
+ temp <- a_root
+ a_root <- b_root
+ b_root <- temp
+ }
+ parent[b_root] <- a_root
+ if (rank[a_root] == rank[b_root]) {
+ rank[a_root] <- rank[a_root] + 1
+ }
+ }
+ return(list(parent = parent, rank = rank))
+}
+
+# -------------------------
+# Kruskal's MST Algorithm
+# -------------------------
+
+kruskal_mst <- function(edges, num_vertices) {
+ # Sort edges by weight
+ edges <- edges[order(sapply(edges, function(e) e$weight))]
+
+ # Initialize disjoint sets
+ dsu <- make_set(num_vertices)
+ parent <- dsu$parent
+ rank <- dsu$rank
+
+ mst_edges <- list()
+ total_cost <- 0
+
+ # Process each edge
+ for (edge in edges) {
+ u <- edge$u
+ v <- edge$v
+ w <- edge$weight
+
+ u_root <- find_set(parent, u)
+ v_root <- find_set(parent, v)
+
+ # If u and v are in different sets, include this edge
+ if (u_root != v_root) {
+ mst_edges <- append(mst_edges, list(edge))
+ total_cost <- total_cost + w
+ merged <- union_sets(parent, rank, u_root, v_root)
+ parent <- merged$parent
+ rank <- merged$rank
+ }
+ }
+
+ return(list(
+ mst_edges = mst_edges,
+ total_cost = total_cost
+ ))
+}
+
+# -------------------------
+# Example usage and testing
+# -------------------------
+
+cat("=== Kruskal's Minimum Spanning Tree Algorithm ===\n")
+
+# Example undirected weighted graph (edges list)
+# Graph:
+# 1 --(4)-- 2
+# 1 --(3)-- 3
+# 2 --(1)-- 3
+# 2 --(2)-- 4
+# 3 --(5)-- 4
+edges <- list(
+ list(u = 1, v = 2, weight = 4),
+ list(u = 1, v = 3, weight = 3),
+ list(u = 2, v = 3, weight = 1),
+ list(u = 2, v = 4, weight = 2),
+ list(u = 3, v = 4, weight = 5)
+)
+
+cat("Graph edges:\n")
+for (e in edges) {
+ cat(paste0("(", e$u, " -- ", e$v, ") weight = ", e$weight, "\n"))
+}
+
+cat("\nRunning Kruskal's MST:\n")
+result <- kruskal_mst(edges, num_vertices = 4)
+
+cat("MST Edges:\n")
+for (e in result$mst_edges) {
+ cat(paste0("(", e$u, " -- ", e$v, ") weight = ", e$weight, "\n"))
+}
+
+cat("Total Minimum Cost:", result$total_cost, "\n")
diff --git a/Desktop/open-source/R/graph_algorithms/page_rank.r b/Desktop/open-source/R/graph_algorithms/page_rank.r
new file mode 100644
index 00000000..825fd8c1
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/page_rank.r
@@ -0,0 +1,70 @@
+# PageRank Algorithm
+#
+# PageRank computes the importance of each vertex (web page) in a directed graph
+# based on the link structure. Pages linked by important pages get higher rank.
+#
+# Time Complexity: O(V^2) for dense graphs (can be O(E) for sparse graphs using adjacency lists)
+# Space Complexity: O(V)
+#
+# Input: graph as an adjacency list where each entry is a vector of outgoing links
+# damping_factor (default 0.85), max_iterations, tolerance for convergence
+# Output: PageRank vector with scores for each vertex
+
+page_rank <- function(graph, damping_factor=0.85, max_iterations=100, tol=1e-6) {
+ num_vertices <- max(as.numeric(names(graph)))
+ pr <- rep(1/num_vertices, num_vertices)
+
+ out_degree <- sapply(graph, length)
+
+ for (iter in 1:max_iterations) {
+ pr_new <- rep((1 - damping_factor)/num_vertices, num_vertices)
+ for (u_char in names(graph)) {
+ u <- as.numeric(u_char)
+ neighbors <- graph[[u_char]]
+ if (length(neighbors) > 0) {
+ for (v in neighbors) {
+ pr_new[v] <- pr_new[v] + damping_factor * pr[u] / out_degree[u]
+ }
+ } else {
+ # Handle dangling nodes (no outgoing edges)
+ pr_new <- pr_new + damping_factor * pr[u] / num_vertices
+ }
+ }
+ if (sum(abs(pr_new - pr)) < tol) break
+ pr <- pr_new
+ }
+
+ return(pr)
+}
+
+# Example usage
+cat("=== PageRank Algorithm ===\n")
+
+# Example directed graph (adjacency list)
+# Graph structure:
+# 1 -> 2,3
+# 2 -> 3
+# 3 -> 1
+# 4 -> 3
+pr_graph <- list(
+ "1" = c(2,3),
+ "2" = c(3),
+ "3" = c(1),
+ "4" = c(3)
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(pr_graph)) {
+ edges <- pr_graph[[v]]
+ if (length(edges) > 0) {
+ cat("Vertex", v, "-> [", paste(edges, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", v, "-> []\n")
+ }
+}
+
+cat("\nComputing PageRank:\n")
+pr_result <- page_rank(pr_graph)
+for (i in 1:length(pr_result)) {
+ cat("Vertex", i, ": PageRank =", round(pr_result[i], 4), "\n")
+}
diff --git a/Desktop/open-source/R/graph_algorithms/prim_mst.r b/Desktop/open-source/R/graph_algorithms/prim_mst.r
new file mode 100644
index 00000000..62f628d2
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/prim_mst.r
@@ -0,0 +1,88 @@
+# Prim's Minimum Spanning Tree Algorithm
+#
+# The Prim algorithm finds the Minimum Spanning Tree (MST) of a connected, undirected, weighted graph.
+# It starts from a source vertex and grows the MST by adding the smallest-weight edge connecting
+# the MST to a new vertex.
+#
+# Time Complexity: O(V^2) with adjacency matrix, can be O(E log V) with priority queue
+# Space Complexity: O(V)
+#
+# Input: graph as an adjacency list where each entry is a list of edges with fields
+# `vertex` and `weight`, and `source` vertex index (integer)
+# Output: A list containing MST edges, total weight
+
+prim_mst <- function(graph, source = 1) {
+ all_vertices <- unique(c(names(graph), unlist(lapply(graph, function(x) sapply(x, function(e) e$vertex)))))
+ all_vertices <- as.numeric(all_vertices)
+ num_vertices <- max(all_vertices)
+
+ in_mst <- rep(FALSE, num_vertices)
+ key <- rep(Inf, num_vertices)
+ parent <- rep(-1, num_vertices)
+
+ key[source] <- 0
+
+ for (i in 1:num_vertices) {
+ # Pick the minimum key vertex not in MST
+ u <- which.min(ifelse(in_mst, Inf, key))
+
+ in_mst[u] <- TRUE
+
+ # Update keys and parents for adjacent vertices
+ for (edge in graph[[as.character(u)]]) {
+ v <- edge$vertex
+ w <- edge$weight
+ if (!in_mst[v] && w < key[v]) {
+ key[v] <- w
+ parent[v] <- u
+ }
+ }
+ }
+
+ # Build MST edges
+ mst_edges <- list()
+ total_weight <- 0
+ for (v in 1:num_vertices) {
+ if (parent[v] != -1) {
+ mst_edges <- append(mst_edges, list(list(from = parent[v], to = v, weight = key[v])))
+ total_weight <- total_weight + key[v]
+ }
+ }
+
+ return(list(
+ edges = mst_edges,
+ total_weight = total_weight
+ ))
+}
+
+# Example usage
+cat("=== Prim's Minimum Spanning Tree Algorithm ===\n")
+
+# Example undirected graph
+# Graph structure:
+# 1 --2--> 2
+# 1 --3--> 3
+# 2 --1--> 3
+# 2 --4--> 4
+# 3 --5--> 4
+prim_graph <- list(
+ "1" = list(list(vertex = 2, weight = 2), list(vertex = 3, weight = 3)),
+ "2" = list(list(vertex = 1, weight = 2), list(vertex = 3, weight = 1), list(vertex = 4, weight = 4)),
+ "3" = list(list(vertex = 1, weight = 3), list(vertex = 2, weight = 1), list(vertex = 4, weight = 5)),
+ "4" = list(list(vertex = 2, weight = 4), list(vertex = 3, weight = 5))
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(prim_graph)) {
+ edges <- prim_graph[[v]]
+ edge_strs <- sapply(edges, function(e) paste0(e$vertex, "(", e$weight, ")"))
+ cat("Vertex", v, "-> [", paste(edge_strs, collapse = ", "), "]\n")
+}
+
+cat("\nRunning Prim's MST from vertex 1:\n")
+mst_result <- prim_mst(prim_graph, 1)
+cat("MST edges:\n")
+for (edge in mst_result$edges) {
+ cat(edge$from, "--", edge$weight, "-->", edge$to, "\n")
+}
+cat("Total weight of MST:", mst_result$total_weight, "\n")
diff --git a/Desktop/open-source/R/graph_algorithms/stoer_wagner_min_cut.r b/Desktop/open-source/R/graph_algorithms/stoer_wagner_min_cut.r
new file mode 100644
index 00000000..09224565
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/stoer_wagner_min_cut.r
@@ -0,0 +1,70 @@
+# Stoer-Wagner Minimum Cut Algorithm
+#
+# Computes the global minimum cut of an undirected weighted graph.
+# The minimum cut is a partition of vertices into two sets minimizing
+# the sum of weights of edges crossing the partition.
+
+stoer_wagner_min_cut <- function(graph) {
+ num_vertices <- max(as.numeric(names(graph)))
+ vertices <- 1:num_vertices
+ min_cut_value <- Inf
+ best_cut <- NULL
+
+ g <- graph # copy of graph to modify
+
+ while(length(vertices) > 1) {
+ used <- rep(FALSE, num_vertices)
+ weights <- rep(0, num_vertices)
+ prev <- NULL
+
+ for (i in 1:length(vertices)) {
+ sel <- which.max(weights * (!used))
+ used[sel] <- TRUE
+ if (i == length(vertices)) {
+ # last vertex added
+ if (weights[sel] < min_cut_value) {
+ min_cut_value <- weights[sel]
+ best_cut <- c(sel)
+ }
+ # merge sel and prev
+ if (!is.null(prev)) {
+ for (v in 1:num_vertices) {
+ g[[prev]][v] <- g[[prev]][v] + g[[sel]][v]
+ g[[v]][prev] <- g[[prev]][v]
+ }
+ vertices <- vertices[vertices != sel]
+ }
+ } else {
+ # update weights
+ for (v in vertices) {
+ if (!used[v]) weights[v] <- weights[v] + g[[sel]][v]
+ }
+ prev <- sel
+ }
+ }
+ }
+
+ return(list(min_cut_value = min_cut_value, cut_vertex = best_cut))
+}
+
+# Example usage
+cat("=== Stoer-Wagner Minimum Cut Algorithm ===\n")
+
+# Example undirected weighted graph as adjacency matrix
+# 1-2(3), 1-3(1), 2-3(3), 2-4(4), 3-4(5)
+graph_matrix <- list(
+ "1" = c(0,3,1,0),
+ "2" = c(3,0,3,4),
+ "3" = c(1,3,0,5),
+ "4" = c(0,4,5,0)
+)
+
+cat("Graph (adjacency matrix):\n")
+for (i in names(graph_matrix)) {
+ cat("Vertex", i, "-> [", paste(graph_matrix[[i]], collapse = ", "), "]\n")
+}
+
+cat("\nComputing global minimum cut:\n")
+min_cut_result <- stoer_wagner_min_cut(graph_matrix)
+cat("Minimum cut value:", min_cut_result$min_cut_value, "\n")
+cat("Cut vertex (representative):", min_cut_result$cut_vertex, "\n")
diff --git a/Desktop/open-source/R/graph_algorithms/topological_sort.r b/Desktop/open-source/R/graph_algorithms/topological_sort.r
new file mode 100644
index 00000000..1c27b137
--- /dev/null
+++ b/Desktop/open-source/R/graph_algorithms/topological_sort.r
@@ -0,0 +1,90 @@
+# Topological Sort Algorithm (Kahn's Algorithm)
+#
+# Topological sorting of a Directed Acyclic Graph (DAG) is a linear ordering
+# of vertices such that for every directed edge u -> v, vertex u comes before v.
+# It is not possible if the graph has cycles.
+#
+# Time Complexity: O(V + E)
+# Space Complexity: O(V + E)
+#
+# Input: graph as an adjacency list where each entry is a list of vertices it points to
+# Output: A list containing the topological order and a flag indicating if a cycle exists
+
+topological_sort <- function(graph) {
+ all_vertices <- unique(c(names(graph), unlist(graph)))
+ all_vertices <- as.numeric(all_vertices)
+
+ num_vertices <- max(all_vertices)
+
+ # Compute in-degree of each vertex
+ in_degree <- rep(0, num_vertices)
+ for (u_char in names(graph)) {
+ u <- as.numeric(u_char)
+ for (v in graph[[u_char]]) {
+ in_degree[v] <- in_degree[v] + 1
+ }
+ }
+
+ # Initialize queue with vertices having in-degree 0
+ queue <- which(in_degree == 0)
+ topo_order <- c()
+
+ while (length(queue) > 0) {
+ u <- queue[1]
+ queue <- queue[-1]
+ topo_order <- c(topo_order, u)
+
+ if (!is.null(graph[[as.character(u)]])) {
+ for (v in graph[[as.character(u)]]) {
+ in_degree[v] <- in_degree[v] - 1
+ if (in_degree[v] == 0) {
+ queue <- c(queue, v)
+ }
+ }
+ }
+ }
+
+ # Check for cycles
+ has_cycle <- length(topo_order) != num_vertices
+
+ return(list(
+ topo_order = if (!has_cycle) topo_order else NULL,
+ has_cycle = has_cycle
+ ))
+}
+
+# Example usage
+cat("=== Topological Sort (Kahn's Algorithm) ===\n")
+
+# Example DAG
+# Graph structure:
+# 1 -> 2, 1 -> 3
+# 2 -> 4
+# 3 -> 4
+# 4 -> 5
+topo_graph <- list(
+ "1" = c(2, 3),
+ "2" = c(4),
+ "3" = c(4),
+ "4" = c(5),
+ "5" = c()
+)
+
+cat("Graph (adjacency list):\n")
+for (v in names(topo_graph)) {
+ edges <- topo_graph[[v]]
+ if (length(edges) > 0) {
+ cat("Vertex", v, "-> [", paste(edges, collapse = ", "), "]\n")
+ } else {
+ cat("Vertex", v, "-> []\n")
+ }
+}
+
+cat("\nRunning Topological Sort:\n")
+ts_result <- topological_sort(topo_graph)
+
+if (!ts_result$has_cycle) {
+ cat("Topological Order:", paste(ts_result$topo_order, collapse = " -> "), "\n")
+} else {
+ cat("Cycle detected! Topological sort not possible.\n")
+}
diff --git a/Desktop/open-source/R/kruskal_mst.r b/Desktop/open-source/R/kruskal_mst.r
new file mode 100644
index 00000000..705f51d2
--- /dev/null
+++ b/Desktop/open-source/R/kruskal_mst.r
@@ -0,0 +1,124 @@
+# Kruskal's Minimum Spanning Tree (MST) Algorithm
+#
+# Kruskal's algorithm finds a subset of edges that connects all vertices in a graph
+# without any cycles and with the minimum possible total edge weight.
+#
+# It uses the Disjoint Set Union (DSU) or Union-Find data structure to detect cycles efficiently.
+#
+# Time Complexity: O(E log E) — dominated by sorting edges
+# Space Complexity: O(V)
+#
+# Input: graph as a list of edges (each edge has `u`, `v`, and `weight`)
+# Output: List containing MST edges and total minimum cost
+
+# -------------------------
+# Helper: Disjoint Set (Union-Find)
+# -------------------------
+
+make_set <- function(n) {
+ parent <- 1:n
+ rank <- rep(0, n)
+ return(list(parent = parent, rank = rank))
+}
+
+find_set <- function(parent, v) {
+ if (parent[v] != v) {
+ parent[v] <- find_set(parent, parent[v]) # Path compression
+ }
+ return(parent[v])
+}
+
+union_sets <- function(parent, rank, a, b) {
+ a_root <- find_set(parent, a)
+ b_root <- find_set(parent, b)
+
+ if (a_root != b_root) {
+ if (rank[a_root] < rank[b_root]) {
+ temp <- a_root
+ a_root <- b_root
+ b_root <- temp
+ }
+ parent[b_root] <- a_root
+ if (rank[a_root] == rank[b_root]) {
+ rank[a_root] <- rank[a_root] + 1
+ }
+ }
+ return(list(parent = parent, rank = rank))
+}
+
+# -------------------------
+# Kruskal's MST Algorithm
+# -------------------------
+
+kruskal_mst <- function(edges, num_vertices) {
+ # Sort edges by weight
+ edges <- edges[order(sapply(edges, function(e) e$weight))]
+
+ # Initialize disjoint sets
+ dsu <- make_set(num_vertices)
+ parent <- dsu$parent
+ rank <- dsu$rank
+
+ mst_edges <- list()
+ total_cost <- 0
+
+ # Process each edge
+ for (edge in edges) {
+ u <- edge$u
+ v <- edge$v
+ w <- edge$weight
+
+ u_root <- find_set(parent, u)
+ v_root <- find_set(parent, v)
+
+ # If u and v are in different sets, include this edge
+ if (u_root != v_root) {
+ mst_edges <- append(mst_edges, list(edge))
+ total_cost <- total_cost + w
+ merged <- union_sets(parent, rank, u_root, v_root)
+ parent <- merged$parent
+ rank <- merged$rank
+ }
+ }
+
+ return(list(
+ mst_edges = mst_edges,
+ total_cost = total_cost
+ ))
+}
+
+# -------------------------
+# Example usage and testing
+# -------------------------
+
+cat("=== Kruskal's Minimum Spanning Tree Algorithm ===\n")
+
+# Example undirected weighted graph (edges list)
+# Graph:
+# 1 --(4)-- 2
+# 1 --(3)-- 3
+# 2 --(1)-- 3
+# 2 --(2)-- 4
+# 3 --(5)-- 4
+edges <- list(
+ list(u = 1, v = 2, weight = 4),
+ list(u = 1, v = 3, weight = 3),
+ list(u = 2, v = 3, weight = 1),
+ list(u = 2, v = 4, weight = 2),
+ list(u = 3, v = 4, weight = 5)
+)
+
+cat("Graph edges:\n")
+for (e in edges) {
+ cat(paste0("(", e$u, " -- ", e$v, ") weight = ", e$weight, "\n"))
+}
+
+cat("\nRunning Kruskal's MST:\n")
+result <- kruskal_mst(edges, num_vertices = 4)
+
+cat("MST Edges:\n")
+for (e in result$mst_edges) {
+ cat(paste0("(", e$u, " -- ", e$v, ") weight = ", e$weight, "\n"))
+}
+
+cat("Total Minimum Cost:", result$total_cost, "\n")
diff --git a/Desktop/open-source/R/machine_learning/.gitignore b/Desktop/open-source/R/machine_learning/.gitignore
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/Desktop/open-source/R/machine_learning/.gitignore
@@ -0,0 +1 @@
+
diff --git a/Desktop/open-source/R/machine_learning/README.md b/Desktop/open-source/R/machine_learning/README.md
new file mode 100644
index 00000000..86fe2aaf
--- /dev/null
+++ b/Desktop/open-source/R/machine_learning/README.md
@@ -0,0 +1,8 @@
+# Machine Learning with R
+
+## Tutorials
+
+- [Introduction to machine learning in R (tutorial) --- from Kaggle](https://www.kaggle.com/camnugent/introduction-to-machine-learning-in-r-tutorial)
+- [An Introduction to Machine Learning with R](https://lgatto.github.io/IntroMachineLearningWithR/)
+- [Machine Learning in R for beginners](https://www.datacamp.com/community/tutorials/machine-learning-in-r)
+- [Machine Learning in R: mlr-tutorial](https://www.notion.so/mlr-Tutorial-b71444fe979c4a8cafe91e10e7f81d79)
diff --git a/Desktop/open-source/R/machine_learning/gradient_boosting.r b/Desktop/open-source/R/machine_learning/gradient_boosting.r
new file mode 100644
index 00000000..a07d68bb
--- /dev/null
+++ b/Desktop/open-source/R/machine_learning/gradient_boosting.r
@@ -0,0 +1,435 @@
+# gradient_boosting.r
+# Gradient Boosting Algorithm Implementation in R
+# A sequential ensemble learning method that builds models iteratively
+# Each new model corrects errors made by previous models
+#
+# Algorithm details:
+# - Uses decision trees as weak learners
+# - Fits each tree to the residuals (errors) of previous predictions
+# - Combines predictions using weighted sum
+# - Learning rate controls contribution of each tree
+# - Time complexity: O(n_trees * n_samples * log(n_samples))
+# - Space complexity: O(n_trees * tree_size)
+
+library(R6)
+
+#' Decision Tree Node
+#' Simple decision tree implementation for gradient boosting
+DecisionTreeNode <- R6Class(
+ "DecisionTreeNode",
+
+ public = list(
+ feature = NULL,
+ threshold = NULL,
+ left = NULL,
+ right = NULL,
+ value = NULL,
+
+ initialize = function(feature = NULL, threshold = NULL,
+ left = NULL, right = NULL, value = NULL) {
+ self$feature <- feature
+ self$threshold <- threshold
+ self$left <- left
+ self$right <- right
+ self$value <- value
+ },
+
+ is_leaf = function() {
+ return(!is.null(self$value))
+ }
+ )
+)
+
+#' Regression Tree for Gradient Boosting
+RegressionTree <- R6Class(
+ "RegressionTree",
+
+ public = list(
+ max_depth = NULL,
+ min_samples_split = NULL,
+ root = NULL,
+
+ initialize = function(max_depth = 3, min_samples_split = 2) {
+ self$max_depth <- max_depth
+ self$min_samples_split <- min_samples_split
+ self$root <- NULL
+ },
+
+ fit = function(X, y) {
+ "Build the decision tree"
+ self$root <- private$build_tree(X, y, depth = 0)
+ invisible(self)
+ },
+
+ predict = function(X) {
+ "Predict values for input data"
+ if (is.vector(X)) {
+ X <- matrix(X, nrow = 1)
+ }
+ apply(X, 1, function(row) private$predict_sample(row, self$root))
+ }
+ ),
+
+ private = list(
+ build_tree = function(X, y, depth) {
+ "Recursively build decision tree"
+ n_samples <- nrow(X)
+
+ # Stopping criteria
+ if (depth >= self$max_depth ||
+ n_samples < self$min_samples_split ||
+ length(unique(y)) == 1) {
+ return(DecisionTreeNode$new(value = mean(y)))
+ }
+
+ # Find best split
+ best_split <- private$find_best_split(X, y)
+
+ if (is.null(best_split)) {
+ return(DecisionTreeNode$new(value = mean(y)))
+ }
+
+ # Split data
+ left_idx <- X[, best_split$feature] <= best_split$threshold
+ right_idx <- !left_idx
+
+ # Build subtrees
+ left_subtree <- private$build_tree(
+ X[left_idx, , drop = FALSE],
+ y[left_idx],
+ depth + 1
+ )
+ right_subtree <- private$build_tree(
+ X[right_idx, , drop = FALSE],
+ y[right_idx],
+ depth + 1
+ )
+
+ return(DecisionTreeNode$new(
+ feature = best_split$feature,
+ threshold = best_split$threshold,
+ left = left_subtree,
+ right = right_subtree
+ ))
+ },
+
+ find_best_split = function(X, y) {
+ "Find the best feature and threshold to split on"
+ best_mse <- Inf
+ best_split <- NULL
+ n_features <- ncol(X)
+ n_samples <- nrow(X)
+ min_samples_leaf <- max(1, floor(self$min_samples_split / 2))
+
+ for (feature in 1:n_features) {
+ # Sort feature values and corresponding targets
+ sorted_idx <- order(X[, feature])
+ sorted_x <- X[sorted_idx, feature]
+ sorted_y <- y[sorted_idx]
+
+ # Consider only unique values as potential thresholds
+ unique_vals <- unique(sorted_x)
+ if (length(unique_vals) <= 1) next
+
+ # Pre-compute cumulative statistics
+ cum_sum <- cumsum(sorted_y)
+ cum_sum_sq <- cumsum(sorted_y^2)
+
+ # Evaluate splits between unique values
+ for (i in 1:(length(unique_vals)-1)) {
+ threshold <- (unique_vals[i] + unique_vals[i+1]) / 2
+ split_idx <- which(sorted_x <= threshold)
+ n_left <- length(split_idx)
+ n_right <- n_samples - n_left
+
+ # Skip if split doesn't meet minimum samples requirement
+ if (n_left < min_samples_leaf || n_right < min_samples_leaf) next
+
+ # Calculate MSE using pre-computed statistics
+ left_sum <- cum_sum[n_left]
+ left_sum_sq <- cum_sum_sq[n_left]
+ right_sum <- cum_sum[n_samples] - left_sum
+ right_sum_sq <- cum_sum_sq[n_samples] - left_sum_sq
+
+ left_mse <- (left_sum_sq - (left_sum^2)/n_left)
+ right_mse <- (right_sum_sq - (right_sum^2)/n_right)
+ mse <- left_mse + right_mse
+
+ if (mse < best_mse) {
+ best_mse <- mse
+ best_split <- list(feature = feature, threshold = threshold)
+ }
+ }
+ }
+
+ return(best_split)
+ },
+
+ predict_sample = function(x, node) {
+ "Predict single sample by traversing tree"
+ if (node$is_leaf()) {
+ return(node$value)
+ }
+
+ if (x[node$feature] <= node$threshold) {
+ return(private$predict_sample(x, node$left))
+ } else {
+ return(private$predict_sample(x, node$right))
+ }
+ }
+ )
+)
+
+#' Gradient Boosting Regressor
+GradientBoostingRegressor <- R6Class(
+ "GradientBoostingRegressor",
+
+ public = list(
+ n_estimators = NULL,
+ learning_rate = NULL,
+ max_depth = NULL,
+ min_samples_split = NULL,
+ trees = NULL,
+ initial_prediction = NULL,
+ early_stopping_rounds = NULL,
+ best_iteration = NULL,
+
+ initialize = function(n_estimators = 100, learning_rate = 0.1,
+ max_depth = 3, min_samples_split = 2,
+ early_stopping_rounds = NULL) {
+ "Initialize gradient boosting parameters"
+ if (n_estimators <= 0 || learning_rate <= 0 || max_depth <= 0) {
+ stop("Parameters must be positive")
+ }
+
+ self$n_estimators <- n_estimators
+ self$learning_rate <- learning_rate
+ self$max_depth <- max_depth
+ self$min_samples_split <- min_samples_split
+ self$trees <- list()
+ self$initial_prediction <- NULL
+ },
+
+ fit = function(X, y, verbose = FALSE) {
+ "Train the gradient boosting model"
+ # Input validation
+ if (!is.numeric(y)) {
+ stop("Target variable 'y' must be numeric")
+ }
+ if (is.vector(X)) {
+ X <- matrix(X, ncol = 1)
+ }
+ if (!is.matrix(X) || !is.numeric(X)) {
+ stop("Input 'X' must be a numeric matrix or vector")
+ }
+ if (length(y) != nrow(X)) {
+ stop("Number of samples in X and y must match")
+ }
+ if (any(is.na(X)) || any(is.na(y))) {
+ stop("Input contains missing values")
+ }
+
+ # Initialize with mean of target
+ self$initial_prediction <- mean(y)
+ predictions <- rep(self$initial_prediction, length(y))
+
+ # Build trees sequentially
+ for (i in 1:self$n_estimators) {
+ # Calculate residuals (negative gradient for MSE loss)
+ residuals <- y - predictions
+
+ # Fit tree to residuals
+ tree <- RegressionTree$new(
+ max_depth = self$max_depth,
+ min_samples_split = self$min_samples_split
+ )
+ tree$fit(X, residuals)
+
+ # Update predictions
+ tree_predictions <- tree$predict(X)
+ predictions <- predictions + self$learning_rate * tree_predictions
+
+ # Store tree
+ self$trees[[i]] <- tree
+
+ # Calculate training error
+ if (verbose && (i %% 10 == 0 || i == 1)) {
+ mse <- mean((y - predictions)^2)
+ rmse <- sqrt(mse)
+ cat(sprintf("Iteration %d/%d - RMSE: %.4f\n",
+ i, self$n_estimators, rmse))
+ }
+ }
+
+ invisible(self)
+ },
+
+ predict = function(X) {
+ "Make predictions using the trained model"
+ if (is.null(self$initial_prediction)) {
+ stop("Model has not been fitted yet")
+ }
+
+ if (is.vector(X)) {
+ X <- matrix(X, ncol = 1)
+ }
+
+ # Start with initial prediction
+ predictions <- rep(self$initial_prediction, nrow(X))
+
+ # Add contribution from each tree
+ for (tree in self$trees) {
+ predictions <- predictions + self$learning_rate * tree$predict(X)
+ }
+
+ return(predictions)
+ },
+
+ score = function(X, y) {
+ "Calculate R-squared score"
+ predictions <- self$predict(X)
+ ss_res <- sum((y - predictions)^2)
+ ss_tot <- sum((y - mean(y))^2)
+ r2 <- 1 - (ss_res / ss_tot)
+ return(r2)
+ },
+
+ get_feature_importance = function() {
+ "Calculate relative feature importance based on split frequency and gain"
+ if (length(self$trees) == 0) {
+ stop("Model has not been fitted yet")
+ }
+
+ # Count feature usage and gain in splits
+ n_features <- ncol(X) # Assumes X from last fit
+ importance <- rep(0, n_features)
+ names(importance) <- paste0("Feature_", 1:n_features)
+
+ calculate_tree_importance <- function(node, depth = 0) {
+ if (is.null(node) || node$is_leaf()) {
+ return(NULL)
+ }
+
+ # Add importance score based on depth (earlier splits are more important)
+ feature_idx <- node$feature
+ importance[feature_idx] <<- importance[feature_idx] + 1 / (depth + 1)
+
+ calculate_tree_importance(node$left, depth + 1)
+ calculate_tree_importance(node$right, depth + 1)
+ }
+
+ # Calculate importance for each tree
+ for (tree in self$trees) {
+ calculate_tree_importance(tree$root)
+ }
+
+ # Normalize importance scores
+ if (sum(importance) > 0) {
+ importance <- importance / sum(importance)
+ }
+
+ # Sort and return as named vector
+ importance <- sort(importance, decreasing = TRUE)
+ return(importance)
+ }
+ )
+)
+
+# Demonstration and testing
+demonstrate_gradient_boosting <- function() {
+ cat("=== Gradient Boosting Algorithm Demo ===\n\n")
+
+ # Generate synthetic dataset
+ set.seed(42)
+ n_samples <- 200
+
+ cat("Example 1: Non-linear regression problem\n")
+ cat("Generating synthetic data...\n\n")
+
+ # Create non-linear relationship
+ X <- matrix(runif(n_samples, -3, 3), ncol = 1)
+ y <- sin(X[, 1]) + 0.3 * X[, 1]^2 + rnorm(n_samples, 0, 0.1)
+
+ # Split into train and test
+ train_idx <- sample(1:n_samples, size = 0.8 * n_samples)
+ test_idx <- setdiff(1:n_samples, train_idx)
+
+ X_train <- matrix(X[train_idx, ], ncol = 1)
+ y_train <- y[train_idx]
+ X_test <- matrix(X[test_idx, ], ncol = 1)
+ y_test <- y[test_idx]
+
+ # Train model
+ cat("Training Gradient Boosting model...\n")
+ model <- GradientBoostingRegressor$new(
+ n_estimators = 50,
+ learning_rate = 0.1,
+ max_depth = 3
+ )
+ model$fit(X_train, y_train, verbose = TRUE)
+
+ # Make predictions
+ cat("\nEvaluating model...\n")
+ train_pred <- model$predict(X_train)
+ test_pred <- model$predict(X_test)
+
+ # Calculate metrics
+ train_rmse <- sqrt(mean((y_train - train_pred)^2))
+ test_rmse <- sqrt(mean((y_test - test_pred)^2))
+ train_r2 <- model$score(X_train, y_train)
+ test_r2 <- model$score(X_test, y_test)
+
+ cat(sprintf("\nResults:\n"))
+ cat(sprintf("Train RMSE: %.4f | Train R²: %.4f\n", train_rmse, train_r2))
+ cat(sprintf("Test RMSE: %.4f | Test R²: %.4f\n\n", test_rmse, test_r2))
+
+ # Example 2: Multi-feature problem
+ cat("Example 2: Multi-feature regression\n")
+ cat("Generating multi-dimensional data...\n")
+
+ X_multi <- matrix(rnorm(n_samples * 3), ncol = 3)
+ y_multi <- 2 * X_multi[, 1] - 3 * X_multi[, 2] +
+ 0.5 * X_multi[, 3]^2 + rnorm(n_samples, 0, 0.5)
+
+ # Train-test split
+ X_train_multi <- X_multi[train_idx, ]
+ y_train_multi <- y_multi[train_idx]
+ X_test_multi <- X_multi[test_idx, ]
+ y_test_multi <- y_multi[test_idx]
+
+ # Train model
+ model2 <- GradientBoostingRegressor$new(
+ n_estimators = 50,
+ learning_rate = 0.1,
+ max_depth = 4
+ )
+ model2$fit(X_train_multi, y_train_multi)
+
+ # Evaluate
+ test_rmse2 <- sqrt(mean((y_test_multi - model2$predict(X_test_multi))^2))
+ test_r2_2 <- model2$score(X_test_multi, y_test_multi)
+
+ cat(sprintf("Test RMSE: %.4f | Test R²: %.4f\n\n", test_rmse2, test_r2_2))
+
+ # Example 3: Hyperparameter comparison
+ cat("Example 3: Impact of learning rate\n")
+ learning_rates <- c(0.01, 0.1, 0.5)
+
+ for (lr in learning_rates) {
+ model_lr <- GradientBoostingRegressor$new(
+ n_estimators = 30,
+ learning_rate = lr,
+ max_depth = 3
+ )
+ model_lr$fit(X_train, y_train, verbose = FALSE)
+ test_rmse_lr <- sqrt(mean((y_test - model_lr$predict(X_test))^2))
+ cat(sprintf("Learning rate %.2f - Test RMSE: %.4f\n", lr, test_rmse_lr))
+ }
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration
+if (!interactive()) {
+ demonstrate_gradient_boosting()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/amicable_numbers.r b/Desktop/open-source/R/mathematics/amicable_numbers.r
new file mode 100644
index 00000000..8b21590e
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/amicable_numbers.r
@@ -0,0 +1,34 @@
+are_numbers_amicable <- function(x, y) {
+
+ #' @description Checks if numbers passed as a parameter are amicable numbers.
+ #' @param x first number to check
+ #' @param y second number to check
+ #' @usage are_numbers_amicable(x, y)
+ #' @details Amicable numbers are two different natural numbers related
+ #' in such a way that the sum of the proper divisors of each
+ #' is equal to the other number.
+ #' @references https://en.wikipedia.org/wiki/Amicable_numbers
+
+ x_divisors_sum <- get_sum_of_divisors(x)
+ y_divisors_sum <- get_sum_of_divisors(y)
+
+ return((x_divisors_sum == y) && (y_divisors_sum == x))
+}
+
+get_sum_of_divisors <- function(n) {
+ sum <- 0
+ limit <- n - 1
+ for (i in 1:limit) {
+ if (n %% i == 0) {
+ sum <- sum + i
+ }
+ }
+
+ return(sum)
+}
+
+result <- are_numbers_amicable(220, 284)
+print(result) # expected true
+
+result <- are_numbers_amicable(15, 100)
+print(result) # expected false
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/armstrong_number.r b/Desktop/open-source/R/mathematics/armstrong_number.r
new file mode 100644
index 00000000..25f6f0eb
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/armstrong_number.r
@@ -0,0 +1,27 @@
+# Armstrong Number (also know as narcissistic numbers) checking in R
+
+isArmstrong <- function(integer){
+
+ digitAmount <- if(integer > 0){
+ floor(log10(integer))+1
+ } else if(integer == 0){
+ 1
+ } else return(FALSE)
+
+ temp <- integer
+ Armstrong <- 0
+
+ while(temp > 0){
+ Armstrong <- Armstrong + (temp %% 10)^digitAmount
+ temp <- floor(temp / 10)
+ }
+
+ if(Armstrong == integer){
+ return(TRUE)
+ } else return(FALSE)
+
+}
+
+isArmstrong(5) #returns TRUE
+isArmstrong(29) #returns FALSE
+isArmstrong(370) #returns TRUE
diff --git a/Desktop/open-source/R/mathematics/bisection_method.r b/Desktop/open-source/R/mathematics/bisection_method.r
new file mode 100644
index 00000000..2c06126b
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/bisection_method.r
@@ -0,0 +1,58 @@
+# Bisection method
+library(roxygen2)
+library(docstring)
+
+NMAX = 100 # maximum number of iterations
+EPSILON = 1e-4 # a small positive quantity
+
+
+func <- function(x) {
+ #' Continuous function for which we want to find the root
+ #' @param x Real input variable
+ #' @returns The evaluation result of the function using the input value
+ x^3 + 2.0*x - 10.0
+}
+
+
+bisection <- function(x_left, x_right, tolerance) {
+ #' Bisection method is a root-finding method that applies to any continuous
+ #' function for which one knows two values with opposite signs.
+ #' @description Finds the root value of a continuous function.
+ #' @param x_left Float
+ #' @param x_right Float
+ #' @param tolerance Float
+ #' @returns Root value
+ #' @usage bisection(x_left, x_right, tolerance)
+ #' @details The method consists of repeatedly bisecting the interval defined
+ #' by the two values and then selecting the subinterval in which the function
+ #' changes sign, and therefore must contain a root. It is a very simple and
+ #' robust method, but it is also relatively slow. Because of this, it is
+ #' often used to obtain a rough approximation to a solution which is then
+ #' used as a starting point for more rapidly converging methods.
+ #' @references https://en.wikipedia.org/wiki/Bisection_method
+ #' @author Aybars Nazlica https://github.com/aybarsnazlica
+
+ n = 1 # step counter
+
+ while(n <= NMAX) {
+ middle = (x_left + x_right) / 2 # midpoint
+ error = middle - x_left
+
+ if (abs(func(middle)) < EPSILON || error < tolerance) {
+ return(middle)
+ }
+
+ if (prod(sign(c(func(middle), func(x_left)))) > 0) { # if sign is positive
+ x_left = middle # new lower endpoint
+ } else {
+ x_right = middle # new upper endpoint
+ }
+
+ n = n + 1 # increase step counter
+ }
+ print("Maximum number of steps exceeded!") # method failed
+}
+
+
+print(abs(bisection(1.0, 2.0, 1e-3) - 1.84668) < EPSILON) # returns TRUE
+print(abs(bisection(100.0, 250.0, 1e-3) - 249.9994) < EPSILON) # returns TRUE
diff --git a/Desktop/open-source/R/mathematics/catalan_numbers.r b/Desktop/open-source/R/mathematics/catalan_numbers.r
new file mode 100644
index 00000000..c9e44a27
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/catalan_numbers.r
@@ -0,0 +1,110 @@
+# Catalan Numbers Implementation in R
+#
+# Catalan numbers form a sequence of natural numbers that occur in various counting problems.
+# The nth Catalan number is given by the formula: C(n) = (2n)! / ((n+1)! * n!)
+# They appear in problems like counting binary trees, valid parentheses combinations,
+# paths in a grid, and polygon triangulations.
+#
+# Time Complexity: O(n) for iterative approach, O(n^2) for recursive with memoization
+# Space Complexity: O(n) for memoization table
+
+# Function to calculate nth Catalan number using dynamic programming
+# @param n: Non-negative integer for which to calculate Catalan number
+# @return: The nth Catalan number
+catalan_dp <- function(n) {
+ # Base cases
+ if (n <= 1) {
+ return(1)
+ }
+
+ # Initialize dp table
+ catalan <- numeric(n + 1)
+ catalan[1] <- 1 # C(0) = 1
+ catalan[2] <- 1 # C(1) = 1
+
+ # Fill the table using the recurrence relation:
+ # C(n) = sum(C(i) * C(n-1-i)) for i from 0 to n-1
+ for (i in 2:n) {
+ catalan[i + 1] <- 0
+ for (j in 0:(i - 1)) {
+ catalan[i + 1] <- catalan[i + 1] + catalan[j + 1] * catalan[i - j]
+ }
+ }
+
+ return(catalan[n + 1])
+}
+
+# Function to calculate nth Catalan number using direct formula
+# @param n: Non-negative integer for which to calculate Catalan number
+# @return: The nth Catalan number
+catalan_formula <- function(n) {
+ if (n <= 1) {
+ return(1)
+ }
+
+ # Use the formula: C(n) = (2n)! / ((n+1)! * n!)
+ # Simplified to: C(n) = (2n choose n) / (n+1)
+ result <- 1
+
+ # Calculate using the iterative formula to avoid large factorials
+ for (i in 0:(n - 1)) {
+ result <- result * (n + i + 1) / (i + 1)
+ }
+
+ return(result / (n + 1))
+}
+
+# Function to generate first n Catalan numbers
+# @param n: Number of Catalan numbers to generate
+# @return: Vector containing first n Catalan numbers
+first_n_catalan <- function(n) {
+ if (n <= 0) {
+ return(numeric(0))
+ }
+
+ result <- numeric(n)
+
+ for (i in 1:n) {
+ result[i] <- catalan_dp(i - 1) # Generate C(0) to C(n-1)
+ }
+
+ return(result)
+}
+
+# Function to find applications of Catalan numbers
+# @param n: The index for which to show applications
+# @return: List of interpretations of the nth Catalan number
+catalan_applications <- function(n) {
+ cat_n <- catalan_dp(n)
+
+ applications <- list(
+ value = cat_n,
+ interpretations = c(
+ paste("Number of ways to arrange", n, "pairs of parentheses"),
+ paste("Number of full binary trees with", n + 1, "leaves"),
+ paste("Number of ways to triangulate a convex polygon with", n + 2, "vertices"),
+ paste("Number of monotonic lattice paths from (0,0) to (n,n) not crossing y=x"),
+ paste("Number of ways to arrange", n, "non-attacking rooks on a triangular board")
+ )
+ )
+
+ return(applications)
+}
+
+# Example usage:
+# # Calculate specific Catalan numbers
+# print(paste("5th Catalan number (DP):", catalan_dp(5)))
+# print(paste("5th Catalan number (Formula):", catalan_formula(5)))
+#
+# # Generate first 10 Catalan numbers
+# first_10 <- first_n_catalan(10)
+# print("First 10 Catalan numbers:")
+# print(first_10)
+#
+# # Show applications of 4th Catalan number
+# apps <- catalan_applications(4)
+# print(paste("C(4) =", apps$value))
+# print("Applications:")
+# for (app in apps$interpretations) {
+# print(app)
+# }
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/euclidean_distance.r b/Desktop/open-source/R/mathematics/euclidean_distance.r
new file mode 100644
index 00000000..08af7ce2
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/euclidean_distance.r
@@ -0,0 +1,8 @@
+euclideanDistance <- function(x, y) {
+ return(sqrt(sum((x - y)^2)))
+}
+
+set.seed(1)
+x <- rnorm(1000)
+y <- runif(1000)
+print(euclideanDistance(x, y))
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/extended_euclidean_algorithm.r b/Desktop/open-source/R/mathematics/extended_euclidean_algorithm.r
new file mode 100644
index 00000000..0594abeb
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/extended_euclidean_algorithm.r
@@ -0,0 +1,282 @@
+# Extended Euclidean Algorithm
+#
+# The Extended Euclidean Algorithm not only finds the Greatest Common Divisor (GCD)
+# of two integers a and b, but also finds integers x and y such that:
+# ax + by = gcd(a, b) (Bézout's identity)
+#
+# This is particularly useful in modular arithmetic, RSA cryptography, and
+# finding modular multiplicative inverses.
+#
+# Time Complexity: O(log(min(a, b)))
+# Space Complexity: O(log(min(a, b))) due to recursion, O(1) for iterative version
+#
+# Input: Two integers a and b
+# Output: A list containing gcd, and coefficients x, y such that ax + by = gcd(a, b)
+
+# Recursive implementation of Extended Euclidean Algorithm
+extended_gcd_recursive <- function(a, b) {
+ # Base case
+ if (b == 0) {
+ return(list(
+ gcd = abs(a),
+ x = sign(a), # Coefficient for a
+ y = 0 # Coefficient for b
+ ))
+ }
+
+ # Recursive call
+ result <- extended_gcd_recursive(b, a %% b)
+
+ # Update coefficients using the relation:
+ # gcd(a, b) = gcd(b, a mod b)
+ # If gcd(b, a mod b) = x1*b + y1*(a mod b)
+ # Then gcd(a, b) = y1*a + (x1 - floor(a/b)*y1)*b
+ x <- result$y
+ y <- result$x - (a %/% b) * result$y
+
+ return(list(
+ gcd = result$gcd,
+ x = x,
+ y = y
+ ))
+}
+
+# Iterative implementation of Extended Euclidean Algorithm
+extended_gcd_iterative <- function(a, b) {
+ # Store original values for final adjustment
+ orig_a <- a
+ orig_b <- b
+
+ # Initialize coefficients
+ old_x <- 1; x <- 0
+ old_y <- 0; y <- 1
+
+ while (b != 0) {
+ quotient <- a %/% b
+
+ # Update a and b
+ temp <- b
+ b <- a %% b
+ a <- temp
+
+ # Update x coefficients
+ temp <- x
+ x <- old_x - quotient * x
+ old_x <- temp
+
+ # Update y coefficients
+ temp <- y
+ y <- old_y - quotient * y
+ old_y <- temp
+ }
+
+ # Adjust signs based on original inputs
+ if (orig_a < 0) {
+ a <- -a
+ old_x <- -old_x
+ }
+ if (orig_b < 0) {
+ old_y <- -old_y
+ }
+
+ return(list(
+ gcd = abs(a),
+ x = old_x,
+ y = old_y
+ ))
+}
+
+# Function to find modular multiplicative inverse
+modular_inverse <- function(a, m) {
+ #' Find modular multiplicative inverse of a modulo m
+ #' Returns x such that (a * x) ≡ 1 (mod m)
+ #' Only exists if gcd(a, m) = 1
+
+ result <- extended_gcd_iterative(a, m)
+
+ if (result$gcd != 1) {
+ return(NULL) # Inverse doesn't exist
+ }
+
+ # Make sure the result is positive
+ inverse <- result$x %% m
+ if (inverse < 0) {
+ inverse <- inverse + m
+ }
+
+ return(inverse)
+}
+
+# Function to solve linear Diophantine equation ax + by = c
+solve_diophantine <- function(a, b, c) {
+ #' Solve the linear Diophantine equation ax + by = c
+ #' Returns NULL if no integer solutions exist
+ #' Returns one particular solution and the general solution pattern
+
+ result <- extended_gcd_iterative(a, b)
+ gcd_ab <- result$gcd
+
+ # Check if solution exists
+ if (c %% gcd_ab != 0) {
+ return(NULL) # No integer solutions exist
+ }
+
+ # Scale the coefficients
+ scale <- c / gcd_ab
+ x0 <- result$x * scale
+ y0 <- result$y * scale
+
+ return(list(
+ particular_solution = list(x = x0, y = y0),
+ general_solution = list(
+ x_formula = paste0(x0, " + ", b/gcd_ab, "*t"),
+ y_formula = paste0(y0, " - ", a/gcd_ab, "*t"),
+ description = "where t is any integer"
+ ),
+ verification = a * x0 + b * y0 == c
+ ))
+}
+
+# Function to find all solutions in a given range
+find_diophantine_solutions_in_range <- function(a, b, c, x_min, x_max, y_min, y_max) {
+ #' Find all integer solutions to ax + by = c in the given ranges
+
+ dioph_result <- solve_diophantine(a, b, c)
+ if (is.null(dioph_result)) {
+ return(NULL)
+ }
+
+ x0 <- dioph_result$particular_solution$x
+ y0 <- dioph_result$particular_solution$y
+ gcd_ab <- extended_gcd_iterative(a, b)$gcd
+
+ b_coeff <- b / gcd_ab
+ a_coeff <- a / gcd_ab
+
+ # Find range of t values
+ t_min_x <- ceiling((x_min - x0) / b_coeff)
+ t_max_x <- floor((x_max - x0) / b_coeff)
+ t_min_y <- ceiling((y0 - y_max) / a_coeff)
+ t_max_y <- floor((y0 - y_min) / a_coeff)
+
+ t_min <- max(t_min_x, t_min_y)
+ t_max <- min(t_max_x, t_max_y)
+
+ if (t_min > t_max) {
+ return(data.frame(x = integer(0), y = integer(0), t = integer(0)))
+ }
+
+ # Generate solutions
+ t_values <- t_min:t_max
+ x_values <- x0 + b_coeff * t_values
+ y_values <- y0 - a_coeff * t_values
+
+ return(data.frame(x = x_values, y = y_values, t = t_values))
+}
+
+# Example usage and testing
+cat("=== Extended Euclidean Algorithm ===\n")
+
+# Test basic extended GCD
+cat("Testing Extended GCD with a=240, b=46:\n")
+result1 <- extended_gcd_iterative(240, 46)
+cat("GCD:", result1$gcd, "\n")
+cat("Coefficients: x =", result1$x, ", y =", result1$y, "\n")
+cat("Verification:", 240 * result1$x + 46 * result1$y, "= GCD:", result1$gcd, "\n")
+cat("Check:", 240 * result1$x + 46 * result1$y == result1$gcd, "\n\n")
+
+# Compare recursive and iterative methods
+cat("Comparing recursive vs iterative methods:\n")
+result_rec <- extended_gcd_recursive(240, 46)
+result_iter <- extended_gcd_iterative(240, 46)
+cat("Recursive - GCD:", result_rec$gcd, ", x:", result_rec$x, ", y:", result_rec$y, "\n")
+cat("Iterative - GCD:", result_iter$gcd, ", x:", result_iter$x, ", y:", result_iter$y, "\n")
+cat("Results match:",
+ result_rec$gcd == result_iter$gcd &&
+ result_rec$x == result_iter$x &&
+ result_rec$y == result_iter$y, "\n\n")
+
+# Test modular multiplicative inverse
+cat("=== Modular Multiplicative Inverse ===\n")
+cat("Finding inverse of 7 modulo 26:\n")
+inv <- modular_inverse(7, 26)
+if (!is.null(inv)) {
+ cat("7^(-1) ≡", inv, "(mod 26)\n")
+ cat("Verification: 7 *", inv, "≡", (7 * inv) %% 26, "(mod 26)\n")
+} else {
+ cat("Inverse does not exist\n")
+}
+
+# Test case where inverse doesn't exist
+cat("\nTesting case where inverse doesn't exist (a=6, m=9):\n")
+inv2 <- modular_inverse(6, 9)
+if (is.null(inv2)) {
+ cat("Inverse does not exist (as expected, since gcd(6,9) = 3 ≠ 1)\n")
+}
+
+# Test solving Diophantine equations
+cat("\n=== Linear Diophantine Equations ===\n")
+cat("Solving 25x + 9y = 7:\n")
+dioph1 <- solve_diophantine(25, 9, 7)
+if (!is.null(dioph1)) {
+ cat("Particular solution: x =", dioph1$particular_solution$x,
+ ", y =", dioph1$particular_solution$y, "\n")
+ cat("General solution:\n")
+ cat(" x =", dioph1$general_solution$x_formula, "\n")
+ cat(" y =", dioph1$general_solution$y_formula, "\n")
+ cat(" ", dioph1$general_solution$description, "\n")
+ cat("Verification:", dioph1$verification, "\n")
+} else {
+ cat("No integer solutions exist\n")
+}
+
+# Test equation with no solutions
+cat("\nSolving 6x + 9y = 10 (should have no integer solutions):\n")
+dioph2 <- solve_diophantine(6, 9, 10)
+if (is.null(dioph2)) {
+ cat("No integer solutions exist (as expected, since gcd(6,9)=3 does not divide 10)\n")
+}
+
+# Find solutions in a specific range
+cat("\n=== Solutions in Range ===\n")
+cat("Finding solutions to 25x + 9y = 7 where -10 ≤ x ≤ 10 and -10 ≤ y ≤ 10:\n")
+range_solutions <- find_diophantine_solutions_in_range(25, 9, 7, -10, 10, -10, 10)
+if (!is.null(range_solutions) && nrow(range_solutions) > 0) {
+ print(range_solutions)
+
+ # Verify solutions
+ cat("Verification of solutions:\n")
+ for (i in 1:nrow(range_solutions)) {
+ x <- range_solutions$x[i]
+ y <- range_solutions$y[i]
+ result_check <- 25 * x + 9 * y
+ cat("25 *", x, "+ 9 *", y, "=", result_check,
+ "(", if(result_check == 7) "✓" else "✗", ")\n")
+ }
+} else {
+ cat("No solutions found in the specified range\n")
+}
+
+# Applications example
+cat("\n=== Practical Applications ===\n")
+cat("Example: Making change with 3-cent and 5-cent coins\n")
+cat("Problem: Can we make exactly 14 cents? Find all ways.\n")
+change_problem <- solve_diophantine(3, 5, 14)
+if (!is.null(change_problem)) {
+ cat("Yes! Particular solution:",
+ change_problem$particular_solution$x, "three-cent coins and",
+ change_problem$particular_solution$y, "five-cent coins\n")
+
+ # Find all non-negative solutions
+ solutions_range <- find_diophantine_solutions_in_range(3, 5, 14, 0, 10, 0, 10)
+ if (nrow(solutions_range) > 0) {
+ cat("All valid ways to make 14 cents:\n")
+ for (i in 1:nrow(solutions_range)) {
+ x <- solutions_range$x[i]
+ y <- solutions_range$y[i]
+ if (x >= 0 && y >= 0) {
+ cat(x, "× 3¢ +", y, "× 5¢ = 14¢\n")
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/factorial.r b/Desktop/open-source/R/mathematics/factorial.r
new file mode 100644
index 00000000..409999c5
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/factorial.r
@@ -0,0 +1,12 @@
+Fact <- function(n){
+ if(n < 0){
+ stop("Error: your input is wrong!")
+ } else if(n == 0){
+ return(1)
+ } else {
+ return(prod(1:n))
+ }
+}
+
+Fact(5)
+Fact(6)
diff --git a/Desktop/open-source/R/mathematics/fibonacci.r b/Desktop/open-source/R/mathematics/fibonacci.r
new file mode 100644
index 00000000..6dfdbf21
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/fibonacci.r
@@ -0,0 +1,14 @@
+Fibonacci <- function(n)
+{
+ if(n == 1|n == 2)
+ {
+ return(1)
+ }
+ else
+ {
+ return(Fibonacci(n-1) + Fibonacci(n - 2))
+ }
+}
+
+Fibonacci(1)
+Fibonacci(11)
diff --git a/Desktop/open-source/R/mathematics/first_n_fibonacci.r b/Desktop/open-source/R/mathematics/first_n_fibonacci.r
new file mode 100644
index 00000000..b1137781
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/first_n_fibonacci.r
@@ -0,0 +1,25 @@
+
+First_n_Fibonacci <- function(n)
+{
+ # creating empty array of size n
+ Fibonacci <- numeric(n)
+
+ # assigning first 2 fibonacci values
+ Fibonacci[1] <- 0
+ Fibonacci[2] <- 1
+
+ # finding the remaining fibonacci numbers using a for loop ranging from 3 to n
+ for (i in 3:n)
+ {
+ Fibonacci[i] <- Fibonacci[i - 2] + Fibonacci[i - 1]
+ }
+
+ # printing the result
+ print(Fibonacci)
+}
+
+First_n_Fibonacci(10) #returns 0 1 1 2 3 5 8 13 21 34
+First_n_Fibonacci(15) #returns 0 1 1 2 3 5 8 13 21 34 55 89 144 233 377
+
+
+
diff --git a/Desktop/open-source/R/mathematics/greatest_common_divisor.r b/Desktop/open-source/R/mathematics/greatest_common_divisor.r
new file mode 100644
index 00000000..9c2b94c1
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/greatest_common_divisor.r
@@ -0,0 +1,24 @@
+# GCD Calculation using Euclidean Algorithm
+find_gcd <- function(a, b) {
+
+ #' @description Computes the Greatest Common Divisor (GCD) of two integers.
+ #' @param a Integer
+ #' @param b Integer
+ #' @usage find_gcd(a, b)
+ #' @details This function uses the Euclidean algorithm to find the GCD.
+ #' GCD is essential in various mathematical contexts, particularly in
+ #' simplification of fractions and number theory applications.
+ #' @references https://en.wikipedia.org/wiki/Euclidean_algorithm
+
+ while (b != 0) {
+ temp <- b
+ b <- a %% b
+ a <- temp
+ }
+
+ return(abs(a))
+}
+
+# Examples
+print(find_gcd(48, 18)) # expected 6
+print(find_gcd(54, 24)) # expected 6
diff --git a/Desktop/open-source/R/mathematics/josephus_problem.r b/Desktop/open-source/R/mathematics/josephus_problem.r
new file mode 100644
index 00000000..7bbfad50
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/josephus_problem.r
@@ -0,0 +1,27 @@
+find_winner <- function(n, k) {
+
+ #' @description Finds the winner in the Josephus problem
+ #' @param n The number of people in the initial circle
+ #' @param k The count of each step
+ #' @usage find_winner(n, k)
+ #' @details In computer science and mathematics, the Josephus problem
+ #' (or Josephus permutation) is a theoretical problem related to a certain
+ #' counting-out game. Such games are used to pick out a person from a group.
+ #' @references https://en.wikipedia.org/wiki/Josephus_problem
+
+ if (k > n) stop("Size of the group must be greater than step")
+
+ winner <- 0
+
+ for (i in 1:n) {
+ winner <- (winner + k) %% i
+ }
+
+ return(winner + 1)
+}
+
+result <- find_winner(11, 2)
+print(result) # expected 7
+
+result <- find_winner(5, 2)
+print(result) # expected 3
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/least_common_multiple.r b/Desktop/open-source/R/mathematics/least_common_multiple.r
new file mode 100644
index 00000000..d440f29c
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/least_common_multiple.r
@@ -0,0 +1,27 @@
+# LCM Calculation
+find_lcm <- function(a, b) {
+
+ #' @description Computes the Least Common Multiple (LCM) of two integers.
+ #' @param a Integer
+ #' @param b Integer
+ #' @usage find_lcm(a, b)
+ #' @details This function uses the relationship between GCD and LCM,
+ #' i.e., LCM(a, b) = |a * b| / GCD(a, b).
+ #' LCM is useful in fraction operations and periodicity calculations.
+ #' @references https://en.wikipedia.org/wiki/Least_common_multiple
+
+ gcd <- function(x, y) {
+ while (y != 0) {
+ temp <- y
+ y <- x %% y
+ x <- temp
+ }
+ return(abs(x))
+ }
+
+ return(abs(a * b) / gcd(a, b))
+}
+
+# Examples
+print(find_lcm(48, 18)) # expected 144
+print(find_lcm(54, 24)) # expected 216
diff --git a/Desktop/open-source/R/mathematics/modular_exponentiation.r b/Desktop/open-source/R/mathematics/modular_exponentiation.r
new file mode 100644
index 00000000..49a0a417
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/modular_exponentiation.r
@@ -0,0 +1,70 @@
+#' Computes modular exponentiation using fast binary exponentiation.
+#'
+#' @param base Numeric or integer base.
+#' @param exp Non-negative integer exponent.
+#' @param mod Optional positive integer modulus. If `NULL`, computes
+#' \eqn{base^{exp}} without modulus (may overflow for large values).
+#'
+#' @return If `mod` is provided, returns an integer in \[0, mod - 1\] equal to
+#' \eqn{(base^{exp}) \bmod mod}. If `mod` is `NULL`, returns \eqn{base^{exp}}.
+#'
+#' @details
+#' Implements **binary (fast) exponentiation** running in \eqn{O(\log exp)} time
+#' and \eqn{O(1)} extra space.
+#' - When `mod` is provided, intermediate values are reduced modulo `mod` to
+#' avoid overflow and keep numbers bounded.
+#' - Negative bases are handled correctly in modular mode by normalizing
+#' \code{base <- (base \%\% mod + mod) \%\% mod}.
+#' - Negative exponents are **not supported** (would require modular inverse).
+#'
+#' @examples
+#' # 2^10 = 1024, and 1024 mod 1000 = 24
+#' modular_exponentiation(2, 10, 1000)
+#' # [1] 24
+#' modular_exponentiation(3, 0, 7) # 1
+#' modular_exponentiation(5, 3) # 125 (no modulus)
+#' modular_exponentiation(-2, 5, 13) # 6 because (-2)^5 = -32 ≡ 6 (mod 13)
+#'
+#' @seealso \code{\link[base]{%%}} for modulus operator.
+#'
+#' @export
+modular_exponentiation <- function(base, exp, mod = NULL) {
+ # validate exponent
+ if (length(exp) != 1 || is.na(exp) || exp < 0 || exp != as.integer(exp)) {
+ stop("`exp` must be a single non-negative integer.")
+ }
+ exp <- as.integer(exp)
+
+ # no modulus: compute power with fast exponentiation (may overflow for large numbers)
+ if (is.null(mod)) {
+ result <- 1
+ b <- base
+ e <- exp
+ while (e > 0) {
+ if (e %% 2L == 1L) result <- result * b
+ b <- b * b
+ e <- e %/% 2L
+ }
+ return(result)
+ }
+
+ # validate modulus
+ if (length(mod) != 1 || is.na(mod) || mod <= 0 || mod != as.integer(mod)) {
+ stop("`mod` must be a single positive integer when provided.")
+ }
+ mod <- as.integer(mod)
+
+ # normalize base into [0, mod-1]
+ b <- ((base %% mod) + mod) %% mod
+ result <- 1L
+ e <- exp
+
+ while (e > 0L) {
+ if (e %% 2L == 1L) {
+ result <- (result * b) %% mod
+ }
+ b <- (b * b) %% mod
+ e <- e %/% 2L
+ }
+ result
+}
diff --git a/Desktop/open-source/R/mathematics/newton_raphson_method.r b/Desktop/open-source/R/mathematics/newton_raphson_method.r
new file mode 100644
index 00000000..4a226aa8
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/newton_raphson_method.r
@@ -0,0 +1,16 @@
+newton_raphson <- function(f, fprime, x0, tol = 1e-6, max_iter = 100) {
+ x <- x0
+ for (i in 1:max_iter) {
+ deriv <- fprime(x)
+ if (abs(deriv) < .Machine$double.eps) {
+ warning("Derivative is zero. Newton-Raphson method fails.")
+ return(NA)
+ }
+ x_new <- x - f(x) / deriv
+ if (abs(x_new - x) < tol) {
+ return(x_new)
+ }
+ x <- x_new
+ }
+ return(x)
+}
diff --git a/Desktop/open-source/R/mathematics/perfect_number.r b/Desktop/open-source/R/mathematics/perfect_number.r
new file mode 100644
index 00000000..3b52e506
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/perfect_number.r
@@ -0,0 +1,39 @@
+is_perfect_number <- function(n) {
+
+ #' @description Checks if number passed as a parameter is a perfect number.
+ #' @param n number to check
+ #' @usage is_perfect_number(n)
+ #' @details In number theory, a perfect number is a positive integer that
+ #' is equal to the sum of its positive divisors, excluding the number itself.
+ #' For instance, 6 has divisors 1, 2 and 3 (excluding itself)
+ #' and 1 + 2 + 3 = 6, so 6 is a perfect number.
+ #' @references https://en.wikipedia.org/wiki/Perfect_number
+
+ if (n < 0) stop("Parameter n must have positive value")
+
+ sum_of_divisors <- 0
+ limit <- n - 1
+
+ for (i in 1:limit) {
+ if (n %% i == 0) {
+ sum_of_divisors <- sum_of_divisors + i
+ }
+ }
+
+ return(sum_of_divisors == n)
+}
+
+result <- is_perfect_number(4)
+print(result) # expected false
+
+result <- is_perfect_number(5)
+print(result) # expected false
+
+result <- is_perfect_number(6)
+print(result) # expected true
+
+result <- is_perfect_number(7)
+print(result) # expected false
+
+result <- is_perfect_number(28)
+print(result) # expected true
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/perfect_square.r b/Desktop/open-source/R/mathematics/perfect_square.r
new file mode 100644
index 00000000..cbdd3772
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/perfect_square.r
@@ -0,0 +1,7 @@
+perfectSquare <- function(x){
+ return(floor(sqrt(x)) == sqrt(x))
+}
+
+set.seed(1)
+inputs <- sample(1:100, 10)
+perfectSquare(inputs)
\ No newline at end of file
diff --git a/Desktop/open-source/R/mathematics/permutation_calculation.r b/Desktop/open-source/R/mathematics/permutation_calculation.r
new file mode 100644
index 00000000..c59b7684
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/permutation_calculation.r
@@ -0,0 +1,21 @@
+# Permutation Calculation
+calculate_permutations <- function(n, r) {
+
+ #' @description Calculates the number of permutations of n objects taken r at a time.
+ #' @param n Total number of objects
+ #' @param r Number of objects in each arrangement
+ #' @usage calculate_permutations(n, r)
+ #' @details Permutations represent the number of ways to arrange r objects from n.
+ #' It is calculated as n! / (n - r)! and is widely used in combinatorics.
+ #' @references https://en.wikipedia.org/wiki/Permutation
+
+ if (r > n) stop("r must be less than or equal to n")
+
+ factorial <- function(x) if (x == 0) 1 else prod(1:x)
+
+ return(factorial(n) / factorial(n - r))
+}
+
+# Example
+print(calculate_permutations(5, 3)) # expected 60
+print(calculate_permutations(10, 2)) # expected 90
diff --git a/Desktop/open-source/R/mathematics/pi_monte_carlo.r b/Desktop/open-source/R/mathematics/pi_monte_carlo.r
new file mode 100644
index 00000000..fd3382f1
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/pi_monte_carlo.r
@@ -0,0 +1,10 @@
+estimatePi <- function(numSims){
+ x <- runif(numSims)
+ y <- runif(numSims)
+ inUnitCircle <- as.integer(x^2 + y^2 <= 1)
+ return(4 * sum(inUnitCircle) / numSims)
+}
+
+set.seed(1)
+estimatePi(3000)
+estimatePi(30000)
diff --git a/Desktop/open-source/R/mathematics/prime.r b/Desktop/open-source/R/mathematics/prime.r
new file mode 100644
index 00000000..7e49a256
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/prime.r
@@ -0,0 +1,17 @@
+# Prime Number Checking in R
+isPrime <- function(number) {
+ (number == 2L || number == 3L) && return(TRUE)
+ (number %% 2L == 0L || number %% 3L == 0L) && return(FALSE)
+ s <- sqrt(number)
+ k <- 1L
+ while (6L * k - 1L <= s) {
+ if (number %% (6L * k + 1L) == 0L || number %% (6L * k - 1L) == 0L)
+ return(FALSE)
+ k <- k + 1L
+ }
+ TRUE
+}
+
+isPrime(2)
+isPrime(5)
+isPrime(4)
diff --git a/Desktop/open-source/R/mathematics/sieve_of_eratosthenes.r b/Desktop/open-source/R/mathematics/sieve_of_eratosthenes.r
new file mode 100644
index 00000000..14c50d4c
--- /dev/null
+++ b/Desktop/open-source/R/mathematics/sieve_of_eratosthenes.r
@@ -0,0 +1,212 @@
+# Sieve of Eratosthenes Algorithm
+#
+# The Sieve of Eratosthenes is an ancient algorithm for finding all prime numbers
+# up to a given limit. It works by iteratively marking the multiples of each prime
+# starting from 2, and the unmarked numbers that remain are primes.
+#
+# Time Complexity: O(n log log n)
+# Space Complexity: O(n)
+#
+# Input: A positive integer n (the upper limit)
+# Output: A vector of all prime numbers from 2 to n
+
+sieve_of_eratosthenes <- function(n) {
+ # Handle edge cases
+ if (n < 2) {
+ return(integer(0)) # No primes less than 2
+ }
+
+ # Create a boolean array "prime[0..n]" and initialize all entries as TRUE
+ prime <- rep(TRUE, n + 1)
+ prime[1] <- FALSE # 1 is not a prime number
+
+ p <- 2
+ while (p * p <= n) {
+ # If prime[p] is not changed, then it is a prime
+ if (prime[p]) {
+ # Update all multiples of p starting from p^2
+ for (i in seq(p * p, n, by = p)) {
+ prime[i] <- FALSE
+ }
+ }
+ p <- p + 1
+ }
+
+ # Collect all prime numbers
+ primes <- which(prime)[-1] # Remove index 1 (since 1 is not prime)
+ return(primes)
+}
+
+# Optimized version that only checks odd numbers after 2
+sieve_of_eratosthenes_optimized <- function(n) {
+ # Handle edge cases
+ if (n < 2) {
+ return(integer(0))
+ }
+ if (n == 2) {
+ return(2)
+ }
+
+ # Start with 2 (the only even prime)
+ primes <- c(2)
+
+ # Create boolean array for odd numbers only (3, 5, 7, ...)
+ # Index i represents number (2*i + 3)
+ size <- (n - 1) %/% 2
+ is_prime <- rep(TRUE, size)
+
+ # Sieve process for odd numbers
+ for (i in 1:size) {
+ if (is_prime[i]) {
+ num <- 2 * i + 1 # Convert index to actual odd number
+
+ # Mark multiples of num starting from num^2
+ if (num * num <= n) {
+ start_idx <- (num * num - 1) %/% 2 # Convert num^2 to index
+ for (j in seq(start_idx, size, by = num)) {
+ if (j <= size) {
+ is_prime[j] <- FALSE
+ }
+ }
+ }
+ }
+ }
+
+ # Collect odd primes
+ odd_primes <- 2 * which(is_prime) + 1
+ primes <- c(primes, odd_primes)
+
+ return(primes)
+}
+
+# Function to count primes up to n (useful for large n)
+count_primes_sieve <- function(n) {
+ if (n < 2) {
+ return(0)
+ }
+
+ prime <- rep(TRUE, n + 1)
+ prime[1] <- FALSE
+
+ p <- 2
+ while (p * p <= n) {
+ if (prime[p]) {
+ for (i in seq(p * p, n, by = p)) {
+ prime[i] <- FALSE
+ }
+ }
+ p <- p + 1
+ }
+
+ return(sum(prime))
+}
+
+# Function to check if a number is prime using trial division (for comparison)
+is_prime_trial_division <- function(n) {
+ if (n <= 1) return(FALSE)
+ if (n <= 3) return(TRUE)
+ if (n %% 2 == 0 || n %% 3 == 0) return(FALSE)
+
+ i <- 5
+ while (i * i <= n) {
+ if (n %% i == 0 || n %% (i + 2) == 0) {
+ return(FALSE)
+ }
+ i <- i + 6
+ }
+ return(TRUE)
+}
+
+# Segmented sieve for finding primes in a range [low, high]
+segmented_sieve <- function(low, high) {
+ # First, find all primes up to sqrt(high)
+ limit <- floor(sqrt(high))
+ primes <- sieve_of_eratosthenes(limit)
+
+ # Create a boolean array for range [low, high]
+ size <- high - low + 1
+ is_prime <- rep(TRUE, size)
+
+ # Mark multiples of each prime in the range
+ for (prime in primes) {
+ # Find the minimum number in [low, high] that is a multiple of prime
+ start <- max(prime * prime, low + (prime - low %% prime) %% prime)
+
+ # Mark multiples of prime in the range
+ for (j in seq(start, high, by = prime)) {
+ is_prime[j - low + 1] <- FALSE
+ }
+ }
+
+ # Handle the case where low = 1 (1 is not prime)
+ if (low == 1) {
+ is_prime[1] <- FALSE
+ }
+
+ # Collect primes in the range
+ range_primes <- (low:high)[is_prime]
+ return(range_primes)
+}
+
+# Example usage and testing
+cat("=== Sieve of Eratosthenes Algorithm ===\n")
+
+# Test with small number
+cat("Primes up to 30:\n")
+primes_30 <- sieve_of_eratosthenes(30)
+cat(paste(primes_30, collapse = ", "), "\n")
+cat("Count:", length(primes_30), "\n\n")
+
+# Test optimized version
+cat("Optimized sieve - Primes up to 30:\n")
+primes_30_opt <- sieve_of_eratosthenes_optimized(30)
+cat(paste(primes_30_opt, collapse = ", "), "\n")
+cat("Count:", length(primes_30_opt), "\n\n")
+
+# Test with larger number
+cat("Primes up to 100:\n")
+primes_100 <- sieve_of_eratosthenes(100)
+cat("Count:", length(primes_100), "\n")
+cat("First 10 primes:", paste(primes_100[1:10], collapse = ", "), "\n")
+cat("Last 10 primes:", paste(tail(primes_100, 10), collapse = ", "), "\n\n")
+
+# Performance comparison for counting primes
+cat("=== Performance Comparison ===\n")
+n <- 1000
+
+# Count using sieve
+start_time <- Sys.time()
+sieve_count <- count_primes_sieve(n)
+sieve_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+# Count using trial division
+start_time <- Sys.time()
+trial_count <- sum(sapply(2:n, is_prime_trial_division))
+trial_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("Primes up to", n, ":\n")
+cat("Sieve method:", sieve_count, "primes (", sprintf("%.4f", sieve_time), "seconds )\n")
+cat("Trial division:", trial_count, "primes (", sprintf("%.4f", trial_time), "seconds )\n")
+cat("Speedup:", sprintf("%.2f", trial_time / sieve_time), "x\n\n")
+
+# Test segmented sieve
+cat("=== Segmented Sieve Example ===\n")
+cat("Primes between 50 and 100:\n")
+range_primes <- segmented_sieve(50, 100)
+cat(paste(range_primes, collapse = ", "), "\n")
+cat("Count:", length(range_primes), "\n\n")
+
+# Edge cases
+cat("=== Edge Cases ===\n")
+cat("Primes up to 1:", paste(sieve_of_eratosthenes(1), collapse = ", "), "\n")
+cat("Primes up to 2:", paste(sieve_of_eratosthenes(2), collapse = ", "), "\n")
+cat("Primes up to 3:", paste(sieve_of_eratosthenes(3), collapse = ", "), "\n")
+
+# Large example (uncomment for larger tests)
+# cat("\n=== Large Scale Test ===\n")
+# large_n <- 10000
+# start_time <- Sys.time()
+# large_primes <- sieve_of_eratosthenes(large_n)
+# end_time <- Sys.time()
+# cat("Found", length(large_primes), "primes up to", large_n, "\n")
+# cat("Computation time:", as.numeric(end_time - start_time, units = "secs"), "seconds\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/quantitative_finance/black_scholes_option_pricing.r b/Desktop/open-source/R/quantitative_finance/black_scholes_option_pricing.r
new file mode 100644
index 00000000..4979946c
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/black_scholes_option_pricing.r
@@ -0,0 +1,251 @@
+# Black-Scholes Option Pricing Algorithm in R
+# Implements the Black-Scholes-Merton model for European option pricing
+# Features: Call/Put pricing, Greeks calculation, and implied volatility estimation
+
+library(R6)
+
+#' BlackScholesCalculator Class
+#' @description R6 class for option pricing using Black-Scholes model
+#' @details Calculates option prices and Greeks for European options
+#' Assumptions:
+#' - No dividend payments
+#' - European-style options (can only be exercised at expiration)
+#' - Log-normal distribution of stock prices
+#' - Constant risk-free rate and volatility
+#' - No transaction costs or taxes
+#' - Perfectly divisible securities
+BlackScholesCalculator <- R6Class(
+ "BlackScholesCalculator",
+
+ public = list(
+ #' @description Initialize calculator with market parameters
+ #' @param r Risk-free interest rate (annualized)
+ #' @param include_checks Whether to perform parameter validation
+ initialize = function(r = 0.05, include_checks = TRUE) {
+ private$risk_free_rate <- r
+ private$validate_params <- include_checks
+ invisible(self)
+ },
+
+ #' @description Calculate call option price
+ #' @param S Current stock price
+ #' @param K Strike price
+ #' @param T Time to expiration (in years)
+ #' @param sigma Volatility (annualized)
+ calculate_call_price = function(S, K, T, sigma) {
+ if (private$validate_params) {
+ private$validate_inputs(S, K, T, sigma)
+ }
+
+ d1 <- private$calculate_d1(S, K, T, sigma)
+ d2 <- private$calculate_d2(d1, sigma, T)
+
+ call_price <- S * stats::pnorm(d1) - K * exp(-private$risk_free_rate * T) * stats::pnorm(d2)
+ return(call_price)
+ },
+
+ #' @description Calculate put option price
+ #' @param S Current stock price
+ #' @param K Strike price
+ #' @param T Time to expiration (in years)
+ #' @param sigma Volatility (annualized)
+ calculate_put_price = function(S, K, T, sigma) {
+ if (private$validate_params) {
+ private$validate_inputs(S, K, T, sigma)
+ }
+
+ d1 <- private$calculate_d1(S, K, T, sigma)
+ d2 <- private$calculate_d2(d1, sigma, T)
+
+ put_price <- K * exp(-private$risk_free_rate * T) * stats::pnorm(-d2) - S * stats::pnorm(-d1)
+ return(put_price)
+ },
+
+ #' @description Calculate all Greeks for a call option
+ #' @param S Current stock price
+ #' @param K Strike price
+ #' @param T Time to expiration (in years)
+ #' @param sigma Volatility (annualized)
+ calculate_call_greeks = function(S, K, T, sigma) {
+ if (private$validate_params) {
+ private$validate_inputs(S, K, T, sigma)
+ }
+
+ d1 <- private$calculate_d1(S, K, T, sigma)
+ d2 <- private$calculate_d2(d1, sigma, T)
+
+ # Calculate Greeks
+ delta <- stats::pnorm(d1)
+ gamma <- stats::dnorm(d1) / (S * sigma * sqrt(T))
+ theta <- (-S * stats::dnorm(d1) * sigma / (2 * sqrt(T)) -
+ private$risk_free_rate * K * exp(-private$risk_free_rate * T) * stats::pnorm(d2))
+ vega <- S * sqrt(T) * stats::dnorm(d1)
+ rho <- K * T * exp(-private$risk_free_rate * T) * stats::pnorm(d2)
+
+ return(list(
+ delta = delta,
+ gamma = gamma,
+ theta = theta,
+ vega = vega,
+ rho = rho
+ ))
+ },
+
+ #' @description Calculate all Greeks for a put option
+ #' @param S Current stock price
+ #' @param K Strike price
+ #' @param T Time to expiration (in years)
+ #' @param sigma Volatility (annualized)
+ calculate_put_greeks = function(S, K, T, sigma) {
+ if (private$validate_params) {
+ private$validate_inputs(S, K, T, sigma)
+ }
+
+ d1 <- private$calculate_d1(S, K, T, sigma)
+ d2 <- private$calculate_d2(d1, sigma, T)
+
+ # Calculate Greeks
+ delta <- stats::pnorm(d1) - 1
+ gamma <- stats::dnorm(d1) / (S * sigma * sqrt(T))
+ theta <- (-S * stats::dnorm(d1) * sigma / (2 * sqrt(T)) +
+ private$risk_free_rate * K * exp(-private$risk_free_rate * T) * stats::pnorm(-d2))
+ vega <- S * sqrt(T) * stats::dnorm(d1)
+ rho <- -K * T * exp(-private$risk_free_rate * T) * stats::pnorm(-d2)
+
+ return(list(
+ delta = delta,
+ gamma = gamma,
+ theta = theta,
+ vega = vega,
+ rho = rho
+ ))
+ },
+
+ #' @description Estimate implied volatility using Newton-Raphson method
+ #' @param market_price Observed market price of the option
+ #' @param S Current stock price
+ #' @param K Strike price
+ #' @param T Time to expiration (in years)
+ #' @param is_call Whether the option is a call (TRUE) or put (FALSE)
+ #' @param tolerance Convergence tolerance
+ #' @param max_iter Maximum iterations
+ estimate_implied_volatility = function(market_price, S, K, T,
+ is_call = TRUE, tolerance = 1e-5, max_iter = 100) {
+ if (private$validate_params) {
+ if (market_price <= 0) stop("Market price must be positive")
+ private$validate_inputs(S, K, T, 0.5) # Initial volatility check
+ }
+
+ # Initial guess for volatility
+ sigma <- sqrt(2 * abs(log(S/K) + private$risk_free_rate * T) / T)
+ sigma <- min(max(0.1, sigma), 5) # Bound initial guess
+
+ for (i in 1:max_iter) {
+ # Calculate price and vega
+ if (is_call) {
+ price <- self$calculate_call_price(S, K, T, sigma)
+ greeks <- self$calculate_call_greeks(S, K, T, sigma)
+ } else {
+ price <- self$calculate_put_price(S, K, T, sigma)
+ greeks <- self$calculate_put_greeks(S, K, T, sigma)
+ }
+
+ diff <- price - market_price
+
+ if (abs(diff) < tolerance) {
+ return(sigma)
+ }
+
+ # Update volatility estimate using Newton-Raphson
+ sigma <- sigma - diff / greeks$vega
+
+ # Bound the volatility
+ sigma <- min(max(0.001, sigma), 5)
+ }
+
+ warning("Implied volatility did not converge")
+ return(sigma)
+ }
+ ),
+
+ private = list(
+ risk_free_rate = NULL,
+ validate_params = NULL,
+
+ calculate_d1 = function(S, K, T, sigma) {
+ (log(S/K) + (private$risk_free_rate + sigma^2/2) * T) / (sigma * sqrt(T))
+ },
+
+ calculate_d2 = function(d1, sigma, T) {
+ d1 - sigma * sqrt(T)
+ },
+
+ validate_inputs = function(S, K, T, sigma) {
+ if (S <= 0) stop("Stock price must be positive")
+ if (K <= 0) stop("Strike price must be positive")
+ if (T <= 0) stop("Time to expiration must be positive")
+ if (sigma <= 0) stop("Volatility must be positive")
+ }
+ )
+)
+
+# Demonstration
+demonstrate_black_scholes <- function() {
+ cat("=== Black-Scholes Option Pricing Demo ===\n\n")
+
+ # Initialize calculator
+ bs <- BlackScholesCalculator$new(r = 0.05)
+
+ # Example parameters
+ S <- 100 # Current stock price
+ K <- 100 # Strike price
+ T <- 1 # One year to expiration
+ sigma <- 0.2 # 20% volatility
+
+ # Calculate option prices
+ call_price <- bs$calculate_call_price(S, K, T, sigma)
+ put_price <- bs$calculate_put_price(S, K, T, sigma)
+
+ cat("Parameters:\n")
+ cat(sprintf("Stock Price: $%.2f\n", S))
+ cat(sprintf("Strike Price: $%.2f\n", K))
+ cat(sprintf("Time to Expiration: %.1f years\n", T))
+ cat(sprintf("Volatility: %.1f%%\n", sigma * 100))
+ cat(sprintf("Risk-free Rate: %.1f%%\n\n", bs$risk_free_rate * 100))
+
+ cat("Option Prices:\n")
+ cat(sprintf("Call Option: $%.2f\n", call_price))
+ cat(sprintf("Put Option: $%.2f\n\n", put_price))
+
+ # Calculate and display Greeks
+ call_greeks <- bs$calculate_call_greeks(S, K, T, sigma)
+ put_greeks <- bs$calculate_put_greeks(S, K, T, sigma)
+
+ cat("Call Option Greeks:\n")
+ cat(sprintf("Delta: %.4f\n", call_greeks$delta))
+ cat(sprintf("Gamma: %.4f\n", call_greeks$gamma))
+ cat(sprintf("Theta: %.4f\n", call_greeks$theta))
+ cat(sprintf("Vega: %.4f\n", call_greeks$vega))
+ cat(sprintf("Rho: %.4f\n\n", call_greeks$rho))
+
+ cat("Put Option Greeks:\n")
+ cat(sprintf("Delta: %.4f\n", put_greeks$delta))
+ cat(sprintf("Gamma: %.4f\n", put_greeks$gamma))
+ cat(sprintf("Theta: %.4f\n", put_greeks$theta))
+ cat(sprintf("Vega: %.4f\n", put_greeks$vega))
+ cat(sprintf("Rho: %.4f\n\n", put_greeks$rho))
+
+ # Demonstrate implied volatility calculation
+ test_market_price <- call_price * 1.1 # Use 10% higher price for demonstration
+ implied_vol <- bs$estimate_implied_volatility(test_market_price, S, K, T, is_call = TRUE)
+ cat("Implied Volatility Estimation:\n")
+ cat(sprintf("Market Price: $%.2f\n", test_market_price))
+ cat(sprintf("Implied Volatility: %.1f%%\n", implied_vol * 100))
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration if not in interactive mode
+if (!interactive()) {
+ demonstrate_black_scholes()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/quantitative_finance/kalman_filter.r b/Desktop/open-source/R/quantitative_finance/kalman_filter.r
new file mode 100644
index 00000000..409a424d
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/kalman_filter.r
@@ -0,0 +1,32 @@
+library(Metrics)
+set.seed(123)
+num_obs <- 100
+true_returns <- rnorm(num_obs, mean = 0.001, sd = 0.01)
+observed_prices <- cumprod(1 + true_returns) * 100
+noise <- rnorm(num_obs, mean = 0, sd = 0.1)
+noisy_prices <- observed_prices + noise
+# Kalman filter implementation
+kalman_filter <- function(observed_prices) {
+ state <- c(observed_prices[1], 0)
+ P <- matrix(c(1, 0, 0, 1), nrow = 2)
+ Q <- matrix(c(0.0001, 0, 0, 0.0001), nrow = 2)
+ R <- 0.1
+ A <- matrix(c(1, 1, 0, 1), nrow = 2)
+ H <- matrix(c(1, 0), nrow = 1)
+ filtered_states <- matrix(0, nrow = length(observed_prices), ncol = 2)
+ for (i in 1:length(observed_prices)) {
+ state_pred <- A %*% state
+ P_pred <- A %*% P %*% t(A) + Q
+ K <- P_pred %*% t(H) %*% solve(H %*% P_pred %*% t(H) + R)
+ state <- state_pred + K %*% (observed_prices[i] - H %*% state_pred)
+ P <- (matrix(1, nrow = 2, ncol = 2) - K %*% H) %*% P_pred
+ filtered_states[i, ] <- state
+ }
+ return(list(filtered_states = filtered_states, state_pred = state_pred, P_pred = P_pred))
+}
+result <- kalman_filter(noisy_prices)
+plot(observed_prices, type = "l", col = "blue", lwd = 2, main = "Kalman Filter")
+lines(result$filtered_states[, 1], type = "l", col = "red", lwd = 2)
+lines(true_returns, type = "l", col = "green", lwd = 2)
+legend("topright", legend = c("Observed Prices", "Filtered Prices", "True Returns"),
+ col = c("blue", "red", "green"), lty = 1, lwd = 2)
diff --git a/Desktop/open-source/R/quantitative_finance/markowitz_portfolio_optimization.r b/Desktop/open-source/R/quantitative_finance/markowitz_portfolio_optimization.r
new file mode 100644
index 00000000..5e659266
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/markowitz_portfolio_optimization.r
@@ -0,0 +1,28 @@
+# Required libraries
+library(tidyquant)
+library(quadprog)
+# Set a seed for reproducibility
+set.seed(123)
+# Generate random data for three assets
+num_assets <- 3
+num_obs <- 100
+returns <- matrix(rnorm(num_assets * num_obs), ncol = num_assets)
+# Define the objective function for portfolio optimization
+objective_function <- function(weights, cov_matrix) {
+ portfolio_return <- sum(weights * colMeans(returns))
+ portfolio_volatility <- sqrt(t(weights) %*% cov_matrix %*% weights)
+ return(c(portfolio_return, portfolio_volatility))
+}
+cov_matrix <- cov(returns)
+constraints <- matrix(0, nrow = 2, ncol = num_assets)
+constraints[1, ] <- colMeans(returns)
+constraints[2, ] <- 1
+optimal_weights <- solve.QP(Dmat = 2 * cov_matrix,
+ dvec = rep(0, num_assets),
+ Amat = t(constraints),
+ bvec = c(0.05, 1),
+ meq = 1)$solution
+cat("Optimal Weights:", optimal_weights, "\n")
+optimal_portfolio <- objective_function(optimal_weights, cov_matrix)
+cat("Expected Return:", optimal_portfolio[1], "\n")
+cat("Volatility:", optimal_portfolio[2], "\n")
diff --git a/Desktop/open-source/R/quantitative_finance/monte_carlo_simulation.r b/Desktop/open-source/R/quantitative_finance/monte_carlo_simulation.r
new file mode 100644
index 00000000..c0c3b69d
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/monte_carlo_simulation.r
@@ -0,0 +1,31 @@
+# Required libraries
+library("quantmod")
+# Parameters
+S0 <- 100 # Initial stock price
+K <- 100 # Strike price
+r <- 0.05 # Risk-free rate
+sigma <- 0.2 # Volatility
+T <- 1 # Time to maturity (in years)
+n <- 252 # Number of trading days
+# Function to simulate stock prices using geometric Brownian motion
+simulate_stock_prices <- function(S0, r, sigma, T, n) {
+ dt <- T/n
+ t <- seq(0, T, by = dt)
+ W <- c(0, cumsum(sqrt(dt) * rnorm(n)))
+ S <- S0 * exp((r - 0.5 * sigma^2) * t + sigma * W)
+ return(S)
+}
+# Function to calculate option price using Monte Carlo simulation
+monte_carlo_option_price <- function(S0, K, r, sigma, T, n, num_simulations) {
+ option_prices <- numeric(num_simulations)
+ for (i in 1:num_simulations) {
+ ST <- simulate_stock_prices(S0, r, sigma, T, n)[n + 1] # Final stock price
+ option_prices[i] <- pmax(ST - K, 0) # Payoff of the option
+ }
+ option_price <- mean(option_prices) * exp(-r * T) # Discounted expected payoff
+ return(option_price)
+}
+# Number of Monte Carlo simulations
+num_simulations <- 10000
+option_price <- monte_carlo_option_price(S0, K, r, sigma, T, n, num_simulations)
+cat("Option price:", option_price, "\n")
diff --git a/Desktop/open-source/R/quantitative_finance/risk_metrics.r b/Desktop/open-source/R/quantitative_finance/risk_metrics.r
new file mode 100644
index 00000000..d09b8fc0
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/risk_metrics.r
@@ -0,0 +1,267 @@
+# Value at Risk (VaR) and Expected Shortfall (ES) Calculator
+# Implements multiple VaR calculation methods and Expected Shortfall
+# Features: Historical, Parametric, and Monte Carlo VaR/ES calculations
+
+library(R6)
+
+#' RiskMetrics Class
+#' @description R6 class for calculating Value at Risk and Expected Shortfall
+#' @details Implements multiple methods for VaR and ES calculation:
+#' - Historical simulation (non-parametric)
+#' - Parametric (variance-covariance)
+#' - Monte Carlo simulation
+#' Time complexity varies by method, documented per method
+RiskMetrics <- R6Class(
+ "RiskMetrics",
+
+ public = list(
+ #' @description Initialize risk calculator
+ #' @param returns Historical returns data
+ #' @param confidence_level Confidence level for VaR/ES (default 0.95)
+ #' @param time_horizon Time horizon in days (default 1)
+ initialize = function(returns = NULL, confidence_level = 0.95, time_horizon = 1) {
+ private$validate_parameters(confidence_level, time_horizon)
+
+ self$returns <- returns
+ self$confidence_level <- confidence_level
+ self$time_horizon <- time_horizon
+
+ if (!is.null(returns)) {
+ private$fit_distribution()
+ }
+ },
+
+ #' @description Calculate Historical VaR
+ #' @param portfolio_value Current portfolio value
+ #' @param method Calculation method ('historical', 'parametric', or 'monte_carlo')
+ #' @param n_simulations Number of simulations for Monte Carlo method
+ calculate_var = function(portfolio_value, method = "historical", n_simulations = 10000) {
+ if (is.null(self$returns)) {
+ stop("No returns data available. Please initialize with returns data.")
+ }
+
+ method <- match.arg(method, c("historical", "parametric", "monte_carlo"))
+
+ var_value <- switch(method,
+ "historical" = private$calculate_historical_var(portfolio_value),
+ "parametric" = private$calculate_parametric_var(portfolio_value),
+ "monte_carlo" = private$calculate_monte_carlo_var(portfolio_value, n_simulations)
+ )
+
+ # Scale VaR to time horizon
+ var_value * sqrt(self$time_horizon)
+ },
+
+ #' @description Calculate Expected Shortfall (Conditional VaR)
+ #' @param portfolio_value Current portfolio value
+ #' @param method Calculation method ('historical', 'parametric', or 'monte_carlo')
+ #' @param n_simulations Number of simulations for Monte Carlo method
+ calculate_es = function(portfolio_value, method = "historical", n_simulations = 10000) {
+ if (is.null(self$returns)) {
+ stop("No returns data available. Please initialize with returns data.")
+ }
+
+ method <- match.arg(method, c("historical", "parametric", "monte_carlo"))
+
+ es_value <- switch(method,
+ "historical" = private$calculate_historical_es(portfolio_value),
+ "parametric" = private$calculate_parametric_es(portfolio_value),
+ "monte_carlo" = private$calculate_monte_carlo_es(portfolio_value, n_simulations)
+ )
+
+ # Scale ES to time horizon
+ es_value * sqrt(self$time_horizon)
+ },
+
+ #' @description Generate risk report with multiple metrics
+ #' @param portfolio_value Current portfolio value
+ #' @param include_methods Which methods to include in report
+ generate_risk_report = function(portfolio_value,
+ include_methods = c("historical", "parametric", "monte_carlo")) {
+ results <- list()
+
+ for (method in include_methods) {
+ results[[method]] <- list(
+ var = self$calculate_var(portfolio_value, method),
+ es = self$calculate_es(portfolio_value, method)
+ )
+ }
+
+ # Add distribution statistics
+ results$statistics <- list(
+ mean_return = mean(self$returns),
+ volatility = sd(self$returns),
+ skewness = private$calculate_skewness(),
+ kurtosis = private$calculate_kurtosis()
+ )
+
+ return(results)
+ },
+
+ #' @description Update returns data and recalculate distribution parameters
+ #' @param new_returns New returns data to use
+ update_returns = function(new_returns) {
+ self$returns <- new_returns
+ private$fit_distribution()
+ invisible(self)
+ },
+
+ # Public fields
+ returns = NULL,
+ confidence_level = NULL,
+ time_horizon = NULL
+ ),
+
+ private = list(
+ # Distribution parameters
+ mean_return = NULL,
+ volatility = NULL,
+
+ #' @description Fit distribution to returns data
+ fit_distribution = function() {
+ private$mean_return <- mean(self$returns)
+ private$volatility <- sd(self$returns)
+ },
+
+ #' @description Calculate Historical VaR
+ calculate_historical_var = function(portfolio_value) {
+ sorted_returns <- sort(self$returns)
+ index <- floor((1 - self$confidence_level) * length(sorted_returns))
+ -sorted_returns[index] * portfolio_value
+ },
+
+ #' @description Calculate Parametric VaR
+ calculate_parametric_var = function(portfolio_value) {
+ z_score <- stats::qnorm(self$confidence_level)
+ portfolio_value * (z_score * private$volatility - private$mean_return)
+ },
+
+ #' @description Calculate Monte Carlo VaR
+ calculate_monte_carlo_var = function(portfolio_value, n_simulations) {
+ simulated_returns <- stats::rnorm(
+ n_simulations,
+ mean = private$mean_return,
+ sd = private$volatility
+ )
+ sorted_returns <- sort(simulated_returns)
+ index <- floor((1 - self$confidence_level) * n_simulations)
+ -sorted_returns[index] * portfolio_value
+ },
+
+ #' @description Calculate Historical Expected Shortfall
+ calculate_historical_es = function(portfolio_value) {
+ sorted_returns <- sort(self$returns)
+ var_index <- floor((1 - self$confidence_level) * length(sorted_returns))
+ tail_returns <- sorted_returns[1:var_index]
+ -mean(tail_returns) * portfolio_value
+ },
+
+ #' @description Calculate Parametric Expected Shortfall
+ calculate_parametric_es = function(portfolio_value) {
+ z_score <- stats::qnorm(self$confidence_level)
+ phi_z <- stats::dnorm(z_score)
+ lambda <- phi_z / (1 - self$confidence_level)
+ portfolio_value * (lambda * private$volatility - private$mean_return)
+ },
+
+ #' @description Calculate Monte Carlo Expected Shortfall
+ calculate_monte_carlo_es = function(portfolio_value, n_simulations) {
+ simulated_returns <- stats::rnorm(
+ n_simulations,
+ mean = private$mean_return,
+ sd = private$volatility
+ )
+ sorted_returns <- sort(simulated_returns)
+ var_index <- floor((1 - self$confidence_level) * n_simulations)
+ tail_returns <- sorted_returns[1:var_index]
+ -mean(tail_returns) * portfolio_value
+ },
+
+ #' @description Calculate distribution skewness
+ calculate_skewness = function() {
+ r <- self$returns
+ n <- length(r)
+ m3 <- sum((r - mean(r))^3) / n
+ s3 <- sd(r)^3
+ m3 / s3
+ },
+
+ #' @description Calculate distribution kurtosis
+ calculate_kurtosis = function() {
+ r <- self$returns
+ n <- length(r)
+ m4 <- sum((r - mean(r))^4) / n
+ s4 <- sd(r)^4
+ m4 / s4 - 3 # Excess kurtosis (normal = 0)
+ },
+
+ #' @description Validate input parameters
+ validate_parameters = function(confidence_level, time_horizon) {
+ if (confidence_level <= 0 || confidence_level >= 1) {
+ stop("Confidence level must be between 0 and 1")
+ }
+ if (time_horizon <= 0) {
+ stop("Time horizon must be positive")
+ }
+ }
+ )
+)
+
+# Demonstration
+demonstrate_risk_metrics <- function() {
+ cat("=== Value at Risk and Expected Shortfall Demo ===\n\n")
+
+ # Generate sample returns data
+ set.seed(42)
+ n_days <- 1000
+ returns <- rnorm(n_days, mean = 0.0001, sd = 0.01)
+
+ # Initialize calculator
+ risk_calc <- RiskMetrics$new(
+ returns = returns,
+ confidence_level = 0.95,
+ time_horizon = 1
+ )
+
+ # Portfolio parameters
+ portfolio_value <- 1000000 # $1 million portfolio
+
+ cat("Portfolio Parameters:\n")
+ cat(sprintf("Value: $%d\n", portfolio_value))
+ cat(sprintf("Confidence Level: %.1f%%\n", risk_calc$confidence_level * 100))
+ cat(sprintf("Time Horizon: %d day(s)\n\n", risk_calc$time_horizon))
+
+ # Calculate VaR using different methods
+ methods <- c("historical", "parametric", "monte_carlo")
+
+ cat("Value at Risk (VaR) Results:\n")
+ for (method in methods) {
+ var_value <- risk_calc$calculate_var(portfolio_value, method)
+ cat(sprintf("%s VaR: $%.2f\n", tools::toTitleCase(method), var_value))
+ }
+ cat("\n")
+
+ cat("Expected Shortfall (ES) Results:\n")
+ for (method in methods) {
+ es_value <- risk_calc$calculate_es(portfolio_value, method)
+ cat(sprintf("%s ES: $%.2f\n", tools::toTitleCase(method), es_value))
+ }
+ cat("\n")
+
+ # Generate and display comprehensive risk report
+ cat("Comprehensive Risk Report:\n")
+ report <- risk_calc$generate_risk_report(portfolio_value)
+
+ cat("\nDistribution Statistics:\n")
+ cat(sprintf("Mean Return: %.6f\n", report$statistics$mean_return))
+ cat(sprintf("Volatility: %.6f\n", report$statistics$volatility))
+ cat(sprintf("Skewness: %.6f\n", report$statistics$skewness))
+ cat(sprintf("Excess Kurtosis: %.6f\n", report$statistics$kurtosis))
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration if not in interactive mode
+if (!interactive()) {
+ demonstrate_risk_metrics()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/quantitative_finance/time_series_analyzer.r b/Desktop/open-source/R/quantitative_finance/time_series_analyzer.r
new file mode 100644
index 00000000..96a5e01c
--- /dev/null
+++ b/Desktop/open-source/R/quantitative_finance/time_series_analyzer.r
@@ -0,0 +1,524 @@
+# Time Series Analysis and ARIMA Modeling
+# Implements comprehensive time series analysis with ARIMA models
+# Features: Stationarity testing, model selection, forecasting, and diagnostics
+
+library(R6)
+
+#' TimeSeriesAnalyzer Class
+#' @description R6 class for time series analysis and ARIMA modeling
+#' @details Provides functionality for:
+#' - Stationarity testing (ADF test)
+#' - ACF/PACF analysis
+#' - ARIMA model fitting
+#' - Model selection using AIC/BIC
+#' - Forecasting with confidence intervals
+TimeSeriesAnalyzer <- R6Class(
+ "TimeSeriesAnalyzer",
+
+ public = list(
+ #' @description Initialize analyzer with time series data
+ #' @param data Time series data (numeric vector)
+ #' @param frequency Frequency of the time series (default: 1)
+ initialize = function(data = NULL, frequency = 1) {
+ if (!is.null(data)) {
+ private$validate_input(data)
+ self$data <- data
+ self$frequency <- frequency
+ private$n <- length(data)
+ }
+ invisible(self)
+ },
+
+ #' @description Test for stationarity using Augmented Dickey-Fuller test
+ #' @param max_lags Maximum number of lags to consider
+ test_stationarity = function(max_lags = NULL) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ if (is.null(max_lags)) {
+ max_lags <- floor(sqrt(private$n))
+ }
+
+ # Perform ADF test
+ result <- private$adf_test(max_lags)
+
+ # Store results
+ private$stationarity_results <- result
+ return(result)
+ },
+
+ #' @description Calculate ACF and PACF
+ #' @param max_lag Maximum lag to consider
+ calculate_acf_pacf = function(max_lag = NULL) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ if (is.null(max_lag)) {
+ max_lag <- min(private$n - 1, floor(10 * log10(private$n)))
+ }
+
+ # Calculate ACF
+ acf_result <- private$calculate_acf(max_lag)
+
+ # Calculate PACF
+ pacf_result <- private$calculate_pacf(max_lag)
+
+ return(list(
+ acf = acf_result,
+ pacf = pacf_result,
+ lags = 1:max_lag
+ ))
+ },
+
+ #' @description Fit ARIMA model to the data
+ #' @param p AR order
+ #' @param d Differencing order
+ #' @param q MA order
+ fit_arima = function(p = 1, d = 0, q = 1) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ # Validate parameters
+ if (any(c(p, d, q) < 0)) {
+ stop("ARIMA orders must be non-negative")
+ }
+
+ # Fit ARIMA model
+ model <- private$fit_arima_model(p, d, q)
+ private$current_model <- model
+
+ return(model)
+ },
+
+ #' @description Automatic model selection using AIC
+ #' @param max_p Maximum AR order to consider
+ #' @param max_d Maximum differencing order
+ #' @param max_q Maximum MA order
+ select_best_model = function(max_p = 3, max_d = 2, max_q = 3) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ best_aic <- Inf
+ best_order <- c(0, 0, 0)
+
+ # Grid search over possible orders
+ for (p in 0:max_p) {
+ for (d in 0:max_d) {
+ for (q in 0:max_q) {
+ tryCatch({
+ model <- self$fit_arima(p, d, q)
+ if (model$aic < best_aic) {
+ best_aic <- model$aic
+ best_order <- c(p, d, q)
+ private$best_model <- model
+ }
+ }, error = function(e) {
+ # Skip failed models
+ })
+ }
+ }
+ }
+
+ return(list(
+ order = best_order,
+ aic = best_aic
+ ))
+ },
+
+ #' @description Generate forecasts with confidence intervals
+ #' @param h Forecast horizon
+ #' @param level Confidence level (0-1)
+ forecast = function(h = 10, level = 0.95) {
+ if (is.null(private$current_model)) {
+ stop("No model fitted. Please fit a model first.")
+ }
+
+ # Generate forecasts
+ forecasts <- private$generate_forecasts(h, level)
+ return(forecasts)
+ },
+
+ #' @description Perform model diagnostics
+ diagnose_model = function() {
+ if (is.null(private$current_model)) {
+ stop("No model fitted. Please fit a model first.")
+ }
+
+ residuals <- private$current_model$residuals
+
+ # Calculate diagnostic statistics
+ diagnostics <- list(
+ residual_mean = mean(residuals),
+ residual_sd = sd(residuals),
+ ljung_box = private$ljung_box_test(residuals),
+ normality = private$normality_test(residuals),
+ arch_effect = private$arch_test(residuals)
+ )
+
+ return(diagnostics)
+ },
+
+ # Public fields
+ data = NULL,
+ frequency = NULL
+ ),
+
+ private = list(
+ n = NULL,
+ current_model = NULL,
+ best_model = NULL,
+ stationarity_results = NULL,
+
+ validate_input = function(data) {
+ if (!is.numeric(data)) {
+ stop("Input data must be numeric")
+ }
+ if (any(is.na(data))) {
+ stop("Input data contains missing values")
+ }
+ if (length(data) < 3) {
+ stop("Input data must have at least 3 observations")
+ }
+ },
+
+ adf_test = function(max_lags) {
+ y <- self$data
+ n <- length(y)
+
+ # Calculate first differences
+ dy <- diff(y)
+ y_1 <- y[-n]
+
+ # Construct regression matrix
+ X <- matrix(1, n-1, 1)
+ X <- cbind(X, y_1)
+
+ # Add lagged differences
+ if (max_lags > 0) {
+ # Add lagged differences using embed() for clarity
+ lagged_dy <- embed(dy, max_lags + 1)[, -1, drop = FALSE]
+ X <- X[(max_lags+1):nrow(X), ] # Align X with lagged_dy rows
+ X <- cbind(X, lagged_dy)
+ dy <- dy[(max_lags+1):length(dy)]
+ }
+
+ # Remove NA rows
+ complete_cases <- stats::complete.cases(X)
+ X <- X[complete_cases, ]
+ dy <- dy[complete_cases]
+
+ # Fit regression
+ fit <- stats::lm(dy ~ X - 1)
+
+ # Calculate test statistic
+ coef <- stats::coef(fit)[2]
+ se <- sqrt(diag(stats::vcov(fit)))[2]
+ t_stat <- coef / se
+
+ # Critical values (approximate)
+ crit_values <- c(
+ "1%" = -3.43,
+ "5%" = -2.86,
+ "10%" = -2.57
+ )
+
+ return(list(
+ statistic = t_stat,
+ critical_values = crit_values,
+ is_stationary = t_stat < -2.86 # 5% level
+ ))
+ },
+
+ calculate_acf = function(max_lag) {
+ y <- scale(self$data) # Standardize data
+ n <- length(y)
+ acf <- numeric(max_lag)
+
+ for (k in 1:max_lag) {
+ acf[k] <- stats::cor(y[1:(n-k)], y[(k+1):n])
+ }
+
+ return(acf)
+ },
+
+ calculate_pacf = function(max_lag) {
+ acf <- private$calculate_acf(max_lag)
+ pacf <- numeric(max_lag)
+
+ # Durbin-Levinson algorithm
+ for (k in 1:max_lag) {
+ if (k == 1) {
+ pacf[k] <- acf[1]
+ } else {
+ r <- matrix(0, k, k)
+ for (i in 1:k) {
+ for (j in 1:k) {
+ idx <- abs(i-j)+1
+ if (idx <= length(acf)) {
+ r[i,j] <- acf[idx]
+ } else {
+ r[i,j] <- 0
+ }
+ }
+ }
+ b <- acf[1:k]
+ pacf[k] <- solve(r, b)[k]
+ }
+ }
+
+ return(pacf)
+ },
+
+ fit_arima_model = function(p, d, q) {
+ # Difference data
+ y <- self$data
+ for (i in 1:d) {
+ y <- diff(y)
+ }
+
+ # Construct and solve Yule-Walker equations for AR part
+ if (p > 0) {
+ r <- private$calculate_acf(p + 1)
+ phi <- solve(stats::toeplitz(r[1:p]), r[2:(p+1)])
+ } else {
+ phi <- numeric(0)
+ }
+
+ # Estimate MA parameters using innovation algorithm
+ if (q > 0) {
+ theta <- numeric(q)
+ e <- y
+ for (i in 1:10) { # Iterate to improve estimates
+ r <- private$calculate_acf(q)
+ psi <- numeric(q)
+ for (j in 1:q) {
+ if (j > 1) {
+ psi[j] <- sum(theta[1:(j-1)] * rev(psi[1:(j-1)])) + theta[j]
+ } else {
+ psi[j] <- theta[j]
+ }
+ }
+ if (q == 1) {
+ theta <- solve(stats::toeplitz(1), r[1])
+ } else {
+ theta <- solve(stats::toeplitz(c(1, psi[1:(q-1)])), r[1:q])
+ }
+ }
+ } else {
+ theta <- numeric(0)
+ }
+
+ # Calculate residuals and AIC
+ resid <- private$calculate_residuals(y, phi, theta)
+ n_params <- p + q
+ aic <- length(resid) * log(var(resid)) + 2 * n_params
+
+ return(list(
+ coefficients = list(ar = phi, ma = theta),
+ residuals = resid,
+ aic = aic,
+ order = c(p, d, q)
+ ))
+ },
+
+ calculate_residuals = function(y, phi, theta) {
+ n <- length(y)
+ p <- length(phi)
+ q <- length(theta)
+ resid <- numeric(n)
+
+ for (t in (max(p,q)+1):n) {
+ pred <- 0
+ if (p > 0) {
+ pred <- pred + sum(phi * y[(t-1):(t-p)])
+ }
+ if (q > 0) {
+ pred <- pred + sum(theta * resid[(t-1):(t-q)])
+ }
+ resid[t] <- y[t] - pred
+ }
+
+ return(resid[(max(p,q)+1):n])
+ },
+
+ generate_forecasts = function(h, level) {
+ model <- private$current_model
+ y <- self$data
+ n <- length(y)
+
+ # Get model orders
+ p <- length(model$coefficients$ar)
+ d <- model$order[2]
+ q <- length(model$coefficients$ma)
+
+ # Generate point forecasts
+ forecasts <- numeric(h)
+ for (i in 1:h) {
+ pred <- 0
+ if (p > 0) {
+ # Use the most recent p values (from original data and previous forecasts)
+ if (i <= p) {
+ ar_terms <- y[(n-p+i):(n+i-1)]
+ } else {
+ # Combine tail of y and head of forecasts as needed
+ num_from_y <- max(0, p - (i-1))
+ num_from_forecasts <- p - num_from_y
+ if (num_from_y > 0) {
+ ar_terms <- c(
+ y[(n - p + i):(n)],
+ forecasts[1:num_from_forecasts]
+ )
+ } else {
+ ar_terms <- forecasts[(i-p):(i-1)]
+ }
+ }
+ pred <- pred + sum(model$coefficients$ar * ar_terms)
+ }
+ forecasts[i] <- pred
+ }
+
+ # Calculate prediction intervals
+ sigma <- sd(model$residuals)
+ z <- stats::qnorm((1 + level) / 2)
+ se <- sigma * sqrt(cumsum(rep(1, h)))
+ lower <- forecasts - z * se
+ upper <- forecasts + z * se
+
+ return(list(
+ mean = forecasts,
+ lower = lower,
+ upper = upper,
+ level = level
+ ))
+ },
+
+ ljung_box_test = function(residuals) {
+ max_lag <- min(20, length(residuals) - 1)
+ acf_vals <- stats::acf(residuals, plot = FALSE, lag.max = max_lag)$acf[-1]
+ n <- length(residuals)
+
+ Q <- n * (n + 2) * sum((acf_vals^2) / (n - 1:max_lag))
+ p_value <- 1 - stats::pchisq(Q, max_lag)
+
+ return(list(
+ statistic = Q,
+ p_value = p_value
+ ))
+ },
+
+ normality_test = function(residuals) {
+ # Jarque-Bera test
+ n <- length(residuals)
+ s <- sum((residuals - mean(residuals))^3) / (n * sd(residuals)^3) # skewness
+ k <- sum((residuals - mean(residuals))^4) / (n * sd(residuals)^4) - 3 # excess kurtosis
+ JB <- n * (s^2/6 + k^2/24)
+ p_value <- 1 - stats::pchisq(JB, df = 2)
+
+ return(list(
+ statistic = JB,
+ p_value = p_value
+ ))
+ },
+
+ arch_test = function(residuals) {
+ # ARCH LM test
+ sq_resid <- residuals^2
+ n <- length(sq_resid)
+ lags <- 5
+
+ # Use embed to construct lagged matrix (current and past lags)
+ # embed(sq_resid, lags + 1) returns a matrix with columns: t, t-1, ..., t-lags
+ if (n <= lags) {
+ stop("Not enough observations for ARCH test lags.")
+ }
+ lagged_mat <- stats::embed(sq_resid, lags + 1)
+ y <- lagged_mat[, 1] # current squared residuals
+ X <- lagged_mat[, -1] # lagged squared residuals
+ fit <- stats::lm(y ~ X)
+ R2 <- summary(fit)$r.squared
+ LM <- nrow(lagged_mat) * R2
+ p_value <- 1 - stats::pchisq(LM, lags)
+
+ return(list(
+ statistic = LM,
+ p_value = p_value
+ ))
+ }
+ )
+)
+
+# Demonstration
+demonstrate_time_series_analysis <- function() {
+ cat("=== Time Series Analysis Demo ===\n\n")
+
+ # Generate sample time series
+ set.seed(42)
+ n <- 500
+
+ # AR(1) process with trend and seasonality
+ t <- 1:n
+ trend <- 0.01 * t
+ seasonal <- 2 * sin(2 * pi * t / 12)
+ ar_process <- stats::arima.sim(list(ar = 0.7), n = n)
+ y <- trend + seasonal + ar_process
+
+ # Initialize analyzer
+ ts_analyzer <- TimeSeriesAnalyzer$new(y, frequency = 12)
+
+ # Test stationarity
+ cat("Testing for stationarity...\n")
+ stat_test <- ts_analyzer$test_stationarity()
+ cat(sprintf("ADF test statistic: %.3f\n", stat_test$statistic))
+ cat("Critical values:\n")
+ print(stat_test$critical_values)
+ cat(sprintf("Series is %sstationary at 5%% level\n\n",
+ ifelse(stat_test$is_stationary, "", "non-")))
+
+ # Analyze ACF/PACF
+ cat("Calculating ACF and PACF...\n")
+ corr <- ts_analyzer$calculate_acf_pacf(20)
+ cat("First 5 lags:\n")
+ cat("ACF: ")
+ cat(sprintf("%.3f ", corr$acf[1:5]))
+ cat("\nPACF: ")
+ cat(sprintf("%.3f ", corr$pacf[1:5]))
+ cat("\n\n")
+
+ # Select best model
+ cat("Selecting best ARIMA model...\n")
+ best <- ts_analyzer$select_best_model(max_p = 2, max_d = 1, max_q = 2)
+ cat(sprintf("Best model: ARIMA(%d,%d,%d)\n",
+ best$order[1], best$order[2], best$order[3]))
+ cat(sprintf("AIC: %.2f\n\n", best$aic))
+
+ # Generate forecasts
+ cat("Generating forecasts...\n")
+ h <- 12 # Forecast horizon
+ forecasts <- ts_analyzer$forecast(h = h, level = 0.95)
+
+ cat("Point forecasts for next 12 periods:\n")
+ cat(sprintf("%.2f ", forecasts$mean))
+ cat("\n\n")
+
+ # Model diagnostics
+ cat("Performing model diagnostics...\n")
+ diagnostics <- ts_analyzer$diagnose_model()
+
+ cat("Residual diagnostics:\n")
+ cat(sprintf("Mean: %.3f\n", diagnostics$residual_mean))
+ cat(sprintf("Standard deviation: %.3f\n", diagnostics$residual_sd))
+ cat(sprintf("Ljung-Box test p-value: %.3f\n", diagnostics$ljung_box$p_value))
+ cat(sprintf("Normality test p-value: %.3f\n", diagnostics$normality$p_value))
+ cat(sprintf("ARCH test p-value: %.3f\n", diagnostics$arch_effect$p_value))
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration if not in interactive mode
+if (!interactive()) {
+ demonstrate_time_series_analysis()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/regression_algorithms/ann.r b/Desktop/open-source/R/regression_algorithms/ann.r
new file mode 100644
index 00000000..233ac47a
--- /dev/null
+++ b/Desktop/open-source/R/regression_algorithms/ann.r
@@ -0,0 +1,14 @@
+library(neuralnet)
+concrete<-read.csv(file = "concrete.txt",stringsAsFactors = F)#get the data
+normalize<-function(x){
+ return((x-min(x))/(max(x)-min(x)))
+}
+concrete<-as.data.frame(lapply(concrete, normalize))
+concrete_train<-concrete[1:773,]
+concrete_test<-concrete[774:1030,]
+concrete_model<-neuralnet(strength~cement+slag+ash+water+superplastic+coarseagg+fineagg+age,data = concrete_train,hidden = 5)
+model_res<-compute(concrete_model,concrete_test[,1:8])
+x=model_res$net.result
+y=concrete_test$strength
+cor(x,y)
+plot(concrete_model)
diff --git a/Desktop/open-source/R/regression_algorithms/anova_oneway.r b/Desktop/open-source/R/regression_algorithms/anova_oneway.r
new file mode 100644
index 00000000..3184d52b
--- /dev/null
+++ b/Desktop/open-source/R/regression_algorithms/anova_oneway.r
@@ -0,0 +1,81 @@
+#' One-way ANOVA (from scratch)
+#'
+#' @description
+#' Computes a one-way Analysis of Variance (ANOVA) to test whether k group means
+#' are equal. Implements sums of squares, F statistic, and p-value without using \code{aov()}.
+#'
+#' @param x Numeric vector of observations.
+#' @param g Factor or character vector of group labels (same length as \code{x}).
+#'
+#' @return A list with:
+#' \item{df_between}{Degrees of freedom between groups (k - 1).}
+#' \item{df_within}{Degrees of freedom within groups (n - k).}
+#' \item{ss_between}{Between-group sum of squares.}
+#' \item{ss_within}{Within-group sum of squares.}
+#' \item{ms_between}{Between-group mean square.}
+#' \item{ms_within}{Within-group mean square.}
+#' \item{F}{F-statistic.}
+#' \item{p_value}{Right-tail p-value from F distribution.}
+#'
+#' @details
+#' One-way ANOVA partitions total variance into between-group and within-group components:
+#' \deqn{SS_T = SS_B + SS_W.}
+#' The test statistic \eqn{F = MS_B / MS_W} follows an F distribution under H0 (equal means).
+#'
+#' @examples
+#' set.seed(0)
+#' x <- c(rnorm(20, 0, 1), rnorm(22, 0.2, 1), rnorm(18, -0.1, 1))
+#' g <- factor(rep(c("A", "B", "C"), times = c(20, 22, 18)))
+#' res <- anova_oneway(x, g)
+#' res$F; res$p_value
+#'
+#' @export
+anova_oneway <- function(x, g) {
+ if (!is.numeric(x)) stop("`x` must be numeric.")
+ if (length(x) != length(g)) stop("`x` and `g` must have the same length.")
+ if (!is.factor(g)) g <- factor(g)
+
+ n <- length(x)
+ k <- nlevels(g)
+ if (k < 2L) stop("Need at least 2 groups.")
+ if (n <= k) stop("Total observations must exceed number of groups.")
+
+ grand_mean <- mean(x)
+
+ # group stats
+ group_means <- tapply(x, g, mean)
+ group_sizes <- tapply(x, g, length)
+
+ # SS_between = sum_{groups} n_j * (mean_j - grand_mean)^2
+ ss_between <- sum(group_sizes * (group_means - grand_mean)^2)
+
+ # SS_within = sum over groups of sum (x_ij - mean_j)^2
+ ss_within <- sum(unlist(tapply(x, g, function(v) sum((v - mean(v))^2))))
+
+ df_between <- k - 1L
+ df_within <- n - k
+
+ ms_between <- ss_between / df_between
+ ms_within <- ss_within / df_within
+
+ F_stat <- ms_between / ms_within
+ p_val <- stats::pf(F_stat, df_between, df_within, lower.tail = FALSE)
+
+ list(
+ df_between = df_between,
+ df_within = df_within,
+ ss_between = ss_between,
+ ss_within = ss_within,
+ ms_between = ms_between,
+ ms_within = ms_within,
+ F = F_stat,
+ p_value = p_val
+ )
+}
+
+# --- Simple self-test (uncomment to run locally) ---
+# set.seed(1)
+# x <- c(rnorm(15, 0, 1), rnorm(15, 0.5, 1), rnorm(15, 1, 1))
+# g <- factor(rep(c("A","B","C"), each = 15))
+# res <- anova_oneway(x, g)
+# stopifnot(res$F > 0, res$p_value >= 0, res$p_value <= 1)
diff --git a/Desktop/open-source/R/regression_algorithms/linear_regression.r b/Desktop/open-source/R/regression_algorithms/linear_regression.r
new file mode 100644
index 00000000..753a0e30
--- /dev/null
+++ b/Desktop/open-source/R/regression_algorithms/linear_regression.r
@@ -0,0 +1,11 @@
+# Load Train and Test datasets
+# Identify feature and response variable(s) and values must be numeric and numpy arrays
+x_train <- input_variables_values_training_datasets
+y_train <- target_variables_values_training_datasets
+x_test <- input_variables_values_test_datasets
+x <- cbind(x_train,y_train)
+# Train the model using the training sets and check score
+linear <- lm(y_train ~ ., data = x)
+summary(linear)
+# Predict Output
+predicted= predict(linear,x_test)
diff --git a/Desktop/open-source/R/regression_algorithms/linearregressionrawr.r b/Desktop/open-source/R/regression_algorithms/linearregressionrawr.r
new file mode 100644
index 00000000..4844ad66
--- /dev/null
+++ b/Desktop/open-source/R/regression_algorithms/linearregressionrawr.r
@@ -0,0 +1,10 @@
+ols<-function(y,x){
+ data<-model.matrix(y ~ ., data = x)
+ decomp <- svd(data)
+ return(decomp$v %*% diag(1 / decomp$d) %*% t(decomp$u) %*% y)
+ }
+
+set.seed(1)
+x <- rnorm(1000)
+y <- 4 * x + rnorm(1000, sd = .5)
+ols(y=y,x=matrix(x, ncol = 1))
diff --git a/Desktop/open-source/R/regression_algorithms/multiple_linear_regression.r b/Desktop/open-source/R/regression_algorithms/multiple_linear_regression.r
new file mode 100644
index 00000000..68a61cd4
--- /dev/null
+++ b/Desktop/open-source/R/regression_algorithms/multiple_linear_regression.r
@@ -0,0 +1,11 @@
+# Introduction to multiple linear regression
+
+# lm stands for Linear Model
+# y_data are modeled as a.x1 + b.x2 + c.x3 + d.x4 + e
+mod3 <- lm(y_data~x1_data+x2_data+x3_data+x4_data, data=name_of_the_dataframe)
+
+# displays the output of the model computed by the previous line
+summary(mod3)
+
+# modeled data : it predicts the output for x_test_data as input information for the model
+predicted <- predict(mod3, x1_test_data, x2_test_data, x3_test_data, x4_test_data)
diff --git a/Desktop/open-source/R/searches/binary_search.r b/Desktop/open-source/R/searches/binary_search.r
new file mode 100644
index 00000000..60217af8
--- /dev/null
+++ b/Desktop/open-source/R/searches/binary_search.r
@@ -0,0 +1,28 @@
+binary_search <- function(arr, target) { #function for finding position of value
+ low <- 1
+ high <- length(arr)
+
+ while (low <= high) {
+ mid <- low + (high - low) %/% 2 #finding mid of array
+
+ if (arr[mid] == target) { #comapring the mis value with the value to search
+ return(mid) # Target found, return its index
+ } else if (arr[mid] < target) {
+ low <- mid + 1 # Search in the right half
+ } else {
+ high <- mid - 1 # Search in the left half
+ }
+ }
+ return(-1) # Target not found in the array
+}
+
+arr <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) #input array (hard code)
+target <- 7 #input value to be searched (hard code)
+
+result <- binary_search(arr, target) #binary_seach function calling
+
+if (result == -1) {
+ cat("Element", target, "not found in the array.\n")
+} else {
+ cat("Element", target, "found at position", result, ".\n")
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/searches/jump_search.r b/Desktop/open-source/R/searches/jump_search.r
new file mode 100644
index 00000000..1c687304
--- /dev/null
+++ b/Desktop/open-source/R/searches/jump_search.r
@@ -0,0 +1,287 @@
+# Jump Search Algorithm Implementation in R
+# An efficient search algorithm for sorted arrays
+# Works by jumping ahead by fixed steps then performing linear search
+# Time complexity: O(√n) where n is the array length
+# Space complexity: O(1)
+
+library(R6)
+
+#' JumpSearch Class
+#' @description R6 class implementing the Jump Search algorithm
+#' @details Jump Search is a searching algorithm for sorted arrays that works by:
+#' 1. Dividing the array into blocks of size √n
+#' 2. Jumping ahead by √n steps until finding a block where target might be
+#' 3. Performing linear search within that block
+#' Advantages:
+#' - Better than linear search: O(√n) vs O(n)
+#' - Better for systems with slow backward iteration compared to binary search
+#' - Simple implementation
+#' Limitations:
+#' - Requires sorted array
+#' - Slower than binary search: O(√n) vs O(log n)
+JumpSearch <- R6Class(
+ "JumpSearch",
+
+ public = list(
+ #' @description Initialize Jump Search
+ #' @param data Sorted array to search in
+ #' @param validate_sorted Whether to validate if array is sorted
+ initialize = function(data = NULL, validate_sorted = TRUE) {
+ if (!is.null(data)) {
+ private$validate_input(data, validate_sorted)
+ self$data <- data
+ private$n <- length(data)
+ private$optimal_jump <- floor(sqrt(private$n))
+ }
+ invisible(self)
+ },
+
+ #' @description Search for a target value in the array
+ #' @param target Value to search for
+ #' @param jump_size Optional custom jump size (default: √n)
+ #' @return List containing index, number of comparisons, and success status
+ search = function(target, jump_size = NULL) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ # Use custom jump size or optimal jump size
+ step <- if (is.null(jump_size)) private$optimal_jump else jump_size
+
+ if (step <= 0 || step != round(step)) {
+ stop("Jump size must be a positive integer")
+ }
+
+ prev <- 0
+ comparisons <- 0
+
+ # Jump through array to find the block where target might be
+ while (step <= private$n && self$data[step] < target) {
+ comparisons <- comparisons + 1
+ prev <- step
+ step <- step + private$optimal_jump
+ }
+
+ # If we've jumped past the end, adjust step to not exceed n
+ if (prev >= private$n) {
+ return(list(
+ index = -1,
+ comparisons = comparisons,
+ found = FALSE
+ ))
+ }
+
+ # Linear search in the identified block
+ while ((prev + 1) <= private$n && self$data[prev + 1] <= target) {
+ comparisons <- comparisons + 1
+ prev <- prev + 1
+
+ if (self$data[prev] == target) {
+ return(list(
+ index = prev,
+ comparisons = comparisons,
+ found = TRUE
+ ))
+ }
+ }
+
+ # Target not found
+ return(list(
+ index = -1,
+ comparisons = comparisons,
+ found = FALSE
+ ))
+ },
+
+ #' @description Search for multiple targets
+ #' @param targets Vector of values to search for
+ #' @return List of search results for each target
+ search_multiple = function(targets) {
+ if (!is.numeric(targets)) {
+ stop("Targets must be numeric")
+ }
+
+ results <- list()
+ for (i in seq_along(targets)) {
+ results[[i]] <- self$search(targets[i])
+ results[[i]]$target <- targets[i]
+ }
+ return(results)
+ },
+
+ #' @description Find the optimal jump size for the current data
+ #' @return Optimal jump size (√n)
+ get_optimal_jump_size = function() {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+ return(private$optimal_jump)
+ },
+
+ #' @description Compare performance with different jump sizes
+ #' @param target Value to search for
+ #' @param jump_sizes Vector of jump sizes to test
+ #' @return Data frame with performance comparison
+ compare_jump_sizes = function(target, jump_sizes = NULL) {
+ if (is.null(self$data)) {
+ stop("No data available. Please initialize with data first.")
+ }
+
+ if (is.null(jump_sizes)) {
+ jump_sizes <- c(
+ floor(sqrt(private$n) / 2),
+ private$optimal_jump,
+ floor(sqrt(private$n) * 2)
+ )
+ }
+
+ results <- data.frame(
+ jump_size = numeric(),
+ comparisons = numeric(),
+ found = logical(),
+ stringsAsFactors = FALSE
+ )
+
+ for (size in jump_sizes) {
+ if (size > 0 && size <= private$n) {
+ result <- self$search(target, jump_size = size)
+ results <- rbind(results, data.frame(
+ jump_size = size,
+ comparisons = result$comparisons,
+ found = result$found
+ ))
+ }
+ }
+
+ return(results)
+ },
+
+ #' @description Update the data array
+ #' @param new_data New sorted array
+ #' @param validate_sorted Whether to validate if array is sorted
+ update_data = function(new_data, validate_sorted = TRUE) {
+ private$validate_input(new_data, validate_sorted)
+ self$data <- new_data
+ private$n <- length(new_data)
+ private$optimal_jump <- floor(sqrt(private$n))
+ invisible(self)
+ },
+
+ # Public fields
+ data = NULL
+ ),
+
+ private = list(
+ n = NULL,
+ optimal_jump = NULL,
+
+ validate_input = function(data, check_sorted) {
+ if (!is.numeric(data)) {
+ stop("Input data must be numeric")
+ }
+ if (any(is.na(data))) {
+ stop("Input data contains missing values")
+ }
+ if (length(data) == 0) {
+ stop("Input data cannot be empty")
+ }
+ if (check_sorted && !private$is_sorted(data)) {
+ stop("Input data must be sorted in ascending order")
+ }
+ },
+
+ is_sorted = function(data) {
+ all(diff(data) >= 0)
+ }
+ )
+)
+
+# Demonstration
+demonstrate_jump_search <- function() {
+ cat("=== Jump Search Algorithm Demo ===\n\n")
+
+ # Example 1: Basic usage
+ cat("Example 1: Basic jump search\n")
+ data <- c(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29)
+ cat("Sorted array:", paste(data, collapse = ", "), "\n")
+
+ js <- JumpSearch$new(data)
+ cat(sprintf("Array size: %d, Optimal jump size: %d\n\n",
+ length(data), js$get_optimal_jump_size()))
+
+ targets <- c(7, 19, 30)
+ for (target in targets) {
+ result <- js$search(target)
+ if (result$found) {
+ cat(sprintf("Target %d found at index %d (comparisons: %d)\n",
+ target, result$index, result$comparisons))
+ } else {
+ cat(sprintf("Target %d not found (comparisons: %d)\n",
+ target, result$comparisons))
+ }
+ }
+
+ # Example 2: Larger dataset
+ cat("\nExample 2: Larger dataset\n")
+ set.seed(42)
+ large_data <- sort(sample(1:1000, 100))
+ js2 <- JumpSearch$new(large_data)
+
+ cat(sprintf("Array size: %d, Optimal jump size: %d\n",
+ length(large_data), js2$get_optimal_jump_size()))
+
+ search_targets <- c(large_data[25], large_data[75], 999)
+ results <- js2$search_multiple(search_targets)
+
+ cat("\nMultiple search results:\n")
+ for (i in seq_along(results)) {
+ res <- results[[i]]
+ cat(sprintf("Target %d: %s (index: %d, comparisons: %d)\n",
+ res$target,
+ ifelse(res$found, "Found", "Not found"),
+ res$index,
+ res$comparisons))
+ }
+
+ # Example 3: Jump size comparison
+ cat("\nExample 3: Comparing different jump sizes\n")
+ test_data <- 1:100
+ js3 <- JumpSearch$new(test_data)
+ target <- 87
+
+ comparison <- js3$compare_jump_sizes(target)
+ cat(sprintf("Searching for %d in array of size %d:\n\n", target, length(test_data)))
+ print(comparison)
+
+ # Example 4: Performance analysis
+ cat("\nExample 4: Performance analysis\n")
+ sizes <- c(100, 1000, 10000)
+
+ cat("Average comparisons for different array sizes:\n")
+ for (n in sizes) {
+ test_array <- 1:n
+ js_test <- JumpSearch$new(test_array)
+
+ # Test multiple searches
+ num_tests <- 20
+ total_comps <- 0
+ for (i in 1:num_tests) {
+ target <- sample(test_array, 1)
+ result <- js_test$search(target)
+ total_comps <- total_comps + result$comparisons
+ }
+
+ avg_comps <- total_comps / num_tests
+ theoretical_bound <- sqrt(n)
+
+ cat(sprintf("n = %5d: Avg comparisons = %.1f, Theoretical O(√n) = %.1f\n",
+ n, avg_comps, theoretical_bound))
+ }
+
+ cat("\n=== Demo Complete ===\n")
+}
+
+# Run demonstration if not in interactive mode
+if (!interactive()) {
+ demonstrate_jump_search()
+}
\ No newline at end of file
diff --git a/Desktop/open-source/R/searches/linear_search.r b/Desktop/open-source/R/searches/linear_search.r
new file mode 100644
index 00000000..735107d6
--- /dev/null
+++ b/Desktop/open-source/R/searches/linear_search.r
@@ -0,0 +1,19 @@
+linear_search<-function(vector, search_value){ #made a function named linear_search having two parameters that are an array and a value to be searched
+ for(i in 1:length(vector)){
+ if(vector[i]==search_value){ #comparing each value of array with the value to be searched
+ return (i)
+ }
+ }
+ return (-1)
+}
+
+user_vec<- c(10,20,30,40,50,60) #input array (hard code)
+user_val<-30 #input value to be searched (hard code)
+
+result<-linear_search(user_vec,user_val) #linear_seach function calling
+
+if(result!=-1){
+ cat("Searched value", user_val, "found at index", result-1) #displaying the index at which value is found (if any)
+}else{
+ cat("Searched value does not exist in array")
+}
diff --git a/Desktop/open-source/R/searches/rabin.karp.string.search.r b/Desktop/open-source/R/searches/rabin.karp.string.search.r
new file mode 100644
index 00000000..86fdcc62
--- /dev/null
+++ b/Desktop/open-source/R/searches/rabin.karp.string.search.r
@@ -0,0 +1,76 @@
+# Rabin–Karp String Search Algorithm in R
+# Author: sgindeed
+# Description: Finds all occurrences of a pattern in a given text using a rolling hash technique.
+
+# Ask user for input
+text <- readline(prompt = "Enter the text: ")
+pattern <- readline(prompt = "Enter the pattern to search: ")
+
+# Convert both to lowercase for case-insensitive matching
+text <- tolower(text)
+pattern <- tolower(pattern)
+
+# Get lengths
+n <- nchar(text)
+m <- nchar(pattern)
+
+# Handle empty or invalid inputs
+if (m == 0) {
+ cat("Empty pattern. Nothing to search.\n")
+ quit(save = "no")
+}
+
+if (m > n) {
+ cat("Pattern is longer than text. Pattern not found in text.\n")
+ quit(save = "no")
+}
+
+# Constants
+base <- 256 # Number of possible characters
+mod <- 101 # A prime number for hashing
+
+# Initialize variables
+p_hash <- 0 # hash for pattern
+t_hash <- 0 # hash for text window
+h <- 1 # base^(m-1)
+matches <- c()
+
+# Compute (base^(m-1)) % mod safely
+for (i in seq_len(m - 1)) {
+ h <- (h * base) %% mod
+}
+
+# Convert characters to ASCII values
+pattern_chars <- utf8ToInt(pattern)
+text_chars <- utf8ToInt(text)
+
+# Compute initial hash values for pattern and first window of text
+for (i in 1:m) {
+ p_hash <- (base * p_hash + pattern_chars[i]) %% mod
+ t_hash <- (base * t_hash + text_chars[i]) %% mod
+}
+
+# Rabin–Karp main search
+for (i in 0:(n - m)) {
+ # If hash matches, verify actual substring
+ if (p_hash == t_hash) {
+ if (substr(text, i + 1, i + m) == pattern) {
+ matches <- c(matches, i + 1)
+ }
+ }
+
+ # Slide window: remove first char, add next char
+ if (i < n - m) {
+ t_hash <- (base * (t_hash - text_chars[i + 1] * h) + text_chars[i + m + 1]) %% mod
+ if (t_hash < 0) {
+ t_hash <- t_hash + mod
+ }
+ }
+}
+
+# Display results
+if (length(matches) > 0) {
+ cat("Pattern found at positions:", paste(matches, collapse = ", "), "\n")
+} else {
+ cat("Pattern not found in the given text.\n")
+}
diff --git a/Desktop/open-source/R/searches/ternary.search.r b/Desktop/open-source/R/searches/ternary.search.r
new file mode 100644
index 00000000..0c2e16a0
--- /dev/null
+++ b/Desktop/open-source/R/searches/ternary.search.r
@@ -0,0 +1,53 @@
+#' Performs ternary search on a sorted integer array.
+#'
+#' @param arr Integer vector. Must be sorted in ascending order.
+#' @param target Integer value to search for in arr.
+#' @return The index (1-based) of the target in arr if found, otherwise -1.
+#' @details The input array must be sorted in ascending order for correct results.
+# Ternary Search in R – works on sorted arrays
+
+ternary_search <- function(arr, target) {
+ l <- 1
+ r <- length(arr)
+ while (l <= r) {
+ mid1 <- l + (r - l) %/% 3
+ mid2 <- r - (r - l) %/% 3
+
+ if (arr[mid1] == target) return(mid1)
+ if (arr[mid2] == target) return(mid2)
+
+ if (target < arr[mid1]) {
+ r <- mid1 - 1
+ } else if (target > arr[mid2]) {
+ l <- mid2 + 1
+ } else {
+ l <- mid1 + 1
+ r <- mid2 - 1
+ }
+ }
+ return(-1)
+}
+
+# --- User Input Section with Validation ---
+arr_input <- readline("Enter sorted integers: ")
+arr_split <- strsplit(arr_input, " ")[[1]]
+arr <- as.integer(arr_split)
+if (length(arr) == 0 || any(is.na(arr))) {
+ cat("Error: Please enter a non-empty list of valid integers separated by spaces.\n")
+ quit(status = 1)
+}
+
+target_input <- readline("Enter target to search: ")
+target <- suppressWarnings(as.integer(target_input))
+if (is.na(target_input) || is.na(target)) {
+ cat("Error: Please enter a valid integer for the target.\n")
+ quit(status = 1)
+}
+
+# --- Execute Search ---
+index <- ternary_search(arr, target)
+
+if (index != -1)
+ cat("Element found at position:", index, "\n")
+else
+ cat("Element not found.\n")
diff --git a/Desktop/open-source/R/searches/z.algorithm.search.r b/Desktop/open-source/R/searches/z.algorithm.search.r
new file mode 100644
index 00000000..9e3b51f2
--- /dev/null
+++ b/Desktop/open-source/R/searches/z.algorithm.search.r
@@ -0,0 +1,37 @@
+# Z-Algorithm for Pattern Searching in R
+# Finds all occurrences of a pattern in a text efficiently in O(n + m).
+
+z_algorithm_search <- function(text, pattern) {
+ concat <- paste0(pattern, "$", text)
+ n <- nchar(concat)
+ Z <- integer(n)
+
+ L <- 0
+ R <- 0
+ for (i in 2:n) {
+ if (i <= R)
+ Z[i] <- min(R - i + 1, Z[i - L + 1])
+
+ while (i + Z[i] <= n && substr(concat, Z[i] + 1, Z[i] + 1) ==
+ substr(concat, i + Z[i], i + Z[i]))
+ Z[i] <- Z[i] + 1
+
+ if (i + Z[i] - 1 > R) {
+ L <- i
+ R <- i + Z[i] - 1
+ }
+ }
+
+ positions <- which(Z > nchar(pattern))
+ matches <- positions - nchar(pattern) - 1
+ return(matches)
+}
+
+text <- tolower(readline("Enter text: "))
+pattern <- tolower(readline("Enter pattern: "))
+
+matches <- z_algorithm_search(text, pattern)
+if (length(matches) > 0)
+ cat("Pattern found at positions:", matches, "\n")
+else
+ cat("Pattern not found.\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/sorting_algorithms/binary_insertion_sort.r b/Desktop/open-source/R/sorting_algorithms/binary_insertion_sort.r
new file mode 100644
index 00000000..4497c777
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/binary_insertion_sort.r
@@ -0,0 +1,43 @@
+# Binary Insertion Sort Function
+# Sorts an input vector using the Binary Insertion Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+binary_insertion_sort <- function(arr) {
+ # Loop through the input vector starting from the second element.
+ for (i in 2:length(arr)) {
+ # Store the current element in a variable.
+ key <- arr[i]
+ # Initialize left and right pointers for binary search.
+ left <- 1
+ right <- i - 1
+
+ # Binary search to find the correct position to insert the key.
+ while (left <= right) {
+ mid <- left + (right - left) %/% 2
+
+ if (key < arr[mid]) {
+ right <- mid - 1
+ } else {
+ left <- mid + 1
+ }
+ }
+
+ # Shift elements to the right to make space for the key.
+ for (j in i: (left + 1)) {
+ arr[j] <- arr[j - 1]
+ }
+
+ # Insert the key into its correct position.
+ arr[left] <- key
+ }
+
+ # Return the sorted vector.
+ return(arr)
+}
+
+# Example usage:
+elements_vec <- c(64, 34, 25, 12, 22, 11, 90)
+sorted_vec <- binary_insertion_sort(elements_vec)
+print(sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/bubble_sort.r b/Desktop/open-source/R/sorting_algorithms/bubble_sort.r
new file mode 100644
index 00000000..7f064c05
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/bubble_sort.r
@@ -0,0 +1,19 @@
+# Bubble sort in R:
+
+bubble.sort <- function(elements.vec) {
+ n <- length(elements.vec)
+ for(i in 1:(n-1)) {
+ for(j in 1:(n-i)) {
+ if(elements.vec[j+1] < elements.vec[j]) {
+ temp <- elements.vec[j]
+ elements.vec[j] <- elements.vec[j+1]
+ elements.vec[j+1] <- temp
+ }
+ }
+ }
+ return(elements.vec)
+}
+
+# Example:
+# bubble.sort(c(5, 2, 3, 1, 4))
+# [1] 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/bucket_sort.r b/Desktop/open-source/R/sorting_algorithms/bucket_sort.r
new file mode 100644
index 00000000..4596725b
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/bucket_sort.r
@@ -0,0 +1,43 @@
+# Bucket Sort Function
+# Sorts an input vector using the Bucket Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+bucket_sort <- function(arr) {
+ if (length(arr) == 0) {
+ return(arr)
+ }
+
+ # Find the maximum and minimum values in the input vector
+ max_val <- max(arr)
+ min_val <- min(arr)
+
+ # Create an array of buckets
+ num_buckets <- max_val - min_val + 1
+ buckets <- vector("list", length = num_buckets)
+
+ # Initialize the buckets
+ for (i in 1:num_buckets) {
+ buckets[[i]] <- numeric(0)
+ }
+
+ # Place elements into buckets
+ for (val in arr) {
+ bucket_index <- val - min_val + 1
+ buckets[[bucket_index]] <- c(buckets[[bucket_index]], val)
+ }
+
+ # Sort each bucket (using any sorting algorithm, e.g., Bubble Sort)
+ sorted_buckets <- lapply(buckets, bubble.sort)
+
+ # Concatenate the sorted buckets to obtain the final sorted array
+ sorted_arr <- unlist(sorted_buckets)
+
+ return(sorted_arr)
+}
+
+# Example usage:
+elements_vec <- c(3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5)
+bucket_sorted_vec <- bucket_sort(elements_vec)
+print(bucket_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/cocktail_sort.r b/Desktop/open-source/R/sorting_algorithms/cocktail_sort.r
new file mode 100644
index 00000000..694f97af
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/cocktail_sort.r
@@ -0,0 +1,56 @@
+cocktailSort <- function(arr) {
+ n <- length(arr)
+ swapped <- TRUE
+ beg <- 1
+ end <- n - 1
+
+ while (swapped) {
+ swapped <- FALSE
+
+ # Forward pass (left to right)
+ for (i in seq(beg, end)) {
+ if (arr[i] > arr[i + 1]) {
+ # Swap arr[i] and arr[i + 1]
+ temp <- arr[i]
+ arr[i] <- arr[i + 1]
+ arr[i + 1] <- temp
+ swapped <- TRUE
+ }
+ }
+
+ # If no swaps occurred in the forward pass, the array is sorted
+ if (!swapped) {
+ break
+ }
+
+ swapped <- FALSE
+ end <- end - 1
+
+ # Backward pass (right to left)
+ for (i in seq(end, beg, by = -1)) {
+ if (arr[i] > arr[i + 1]) {
+ # Swap arr[i] and arr[i + 1]
+ temp <- arr[i]
+ arr[i] <- arr[i + 1]
+ arr[i + 1] <- temp
+ swapped <- TRUE
+ }
+ }
+
+ beg <- beg + 1
+ }
+
+ return(arr)
+}
+
+# Example Usage
+unsorted_array <- c(38, 27, 43, 3, 9, 82, 10)
+cat("Unsorted Array: ", unsorted_array, "\n")
+
+# Call the Cocktail Sort function to sort the array
+sorted_array <- cocktailSort(unsorted_array)
+
+cat("Sorted Array: ", sorted_array, "\n")
+
+# Example: The 'unsorted_array' is sorted using Cocktail Sort
+
diff --git a/Desktop/open-source/R/sorting_algorithms/comb_sort.r b/Desktop/open-source/R/sorting_algorithms/comb_sort.r
new file mode 100644
index 00000000..ee4e8af4
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/comb_sort.r
@@ -0,0 +1,26 @@
+# Comb sort in R:
+
+comb.sort <- function(elements.vec) {
+ gap <- length(elements.vec)
+ swaps <- 1
+ while (gap > 1 && swaps == 1) {
+ gap = floor(gap / 1.3)
+ if (gap < 1) {
+ gap = 1
+ }
+ swaps = 0
+ i = 1
+ while (i + gap <= length(a)) {
+ if (elements.vec[i] > elements.vec[i + gap]) {
+ elements.vec[c(i, i + gap)] <- elements.vec[c(i + gap, i)]
+ swaps = 1
+ }
+ i <- i + 1
+ }
+ }
+ return(elements.vec)
+}
+
+# Example:
+# comb.sort(sample(1:100,10))
+# [1] 9 49 50 51 56 60 61 71 86 95
diff --git a/Desktop/open-source/R/sorting_algorithms/counting_sort.r b/Desktop/open-source/R/sorting_algorithms/counting_sort.r
new file mode 100644
index 00000000..2cc2028f
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/counting_sort.r
@@ -0,0 +1,26 @@
+# Counting sort in R:
+
+counting.sort <- function(elements.vec){
+ min <- min(elements.vec)
+ max <- max(elements.vec)
+ count <- rep(0,(max - min + 1))
+ for(i in 1:length(elements.vec)){
+ x <- 1 - min + elements.vec[i]
+ count[x] <- count[x] + 1
+ }
+ for(i in 2:length(count)){
+ count[i] <- count[i] + count[i-1]
+ }
+ result <- rep(0,length(elements.vec))
+ for(i in 1:length(elements.vec)){
+ x <- 1 - min + elements.vec[i]
+ index <- count[x]
+ result[index] <- elements.vec[i]
+ count[x] = count[x] - 1
+ }
+ return(result)
+}
+
+# Example:
+# counting.sort(c(5, 2, 3, 1, 4))
+# [1] 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/cycle_sort.r b/Desktop/open-source/R/sorting_algorithms/cycle_sort.r
new file mode 100644
index 00000000..536bfabc
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/cycle_sort.r
@@ -0,0 +1,59 @@
+# Cycle Sort Function
+# Sorts an input vector in-place using the Cycle Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+cycle_sort <- function(arr) {
+ n <- length(arr)
+ for (cycle_start in 1:(n - 1)) {
+ item <- arr[cycle_start]
+ pos <- cycle_start
+
+ # Find the correct position for the current item
+ for (i in (cycle_start + 1):n) {
+ if (arr[i] < item) {
+ pos <- pos + 1
+ }
+ }
+
+ # Skip if the item is already in the correct position
+ if (pos == cycle_start) {
+ next
+ }
+
+ # Move the item to its correct position
+ while (item == arr[pos]) {
+ pos <- pos + 1
+ }
+ temp <- arr[pos]
+ arr[pos] <- item
+ item <- temp
+
+ # Rotate the remaining cycle
+ while (pos != cycle_start) {
+ pos <- cycle_start
+ for (i in (cycle_start + 1):n) {
+ if (arr[i] < item) {
+ pos <- pos + 1
+ }
+ }
+
+ # Skip if the item is already in the correct position
+ while (item == arr[pos]) {
+ pos <- pos + 1
+ }
+
+ # Move the item to its correct position
+ temp <- arr[pos]
+ arr[pos] <- item
+ item <- temp
+ }
+ }
+ return(arr)
+}
+
+# Example usage:
+elements_vec <- c(3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5)
+cycle_sorted_vec <- cycle_sort(elements_vec)
+print(cycle_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/gnome_sort.r b/Desktop/open-source/R/sorting_algorithms/gnome_sort.r
new file mode 100644
index 00000000..c4ded37a
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/gnome_sort.r
@@ -0,0 +1,30 @@
+# Gnome Sort Function
+# Sorts an input vector using the Gnome Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+
+gnome_sort <- function(arr) {
+ index <- 1
+ n <- length(arr)
+
+ while (index <= n) {
+ if (index == 1 || arr[index] >= arr[index - 1]) {
+ index <- index + 1
+ } else {
+ # Swap arr[index] and arr[index - 1]
+ temp <- arr[index]
+ arr[index] <- arr[index - 1]
+ arr[index - 1] <- temp
+ index <- index - 1
+ }
+ }
+
+ return(arr)
+}
+
+# Example usage:
+elements_vec <- c(34, 2, 10, -9)
+gnome_sorted_vec <- gnome_sort(elements_vec)
+print(gnome_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/heap_sort.r b/Desktop/open-source/R/sorting_algorithms/heap_sort.r
new file mode 100644
index 00000000..c2378798
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/heap_sort.r
@@ -0,0 +1,63 @@
+# Heap sort in R:
+
+build.heap <- function(elements.vec) {
+ l = length(elements.vec)
+ heap = elements.vec
+ for (i in l:1) {
+ heap = modify.heap(heap, i)
+ }
+ return(heap)
+}
+
+is.heap <- function(heap, rootIndex) {
+ i = rootIndex
+ res = T
+ while(2 * i <= length(heap) & res) {
+ child = c(heap[2 * i], heap[2 * i + 1])
+ child = child[!is.na(child)]
+ result.bool = all(heap[i] <= child)
+ i = i + 1
+ }
+ return(result.bool)
+}
+
+modify.heap <- function(heap, rootIndex) {
+ l = length(heap)
+ flag = 1
+ while (rootIndex * 2 <= l && flag == 1) {
+ leftIndex = rootIndex * 2
+ rightIndex = rootIndex * 2 + 1
+ flag = 0
+ child = c(heap[leftIndex], heap[rightIndex])
+ child = child[!is.na(child)]
+ minIndex = which.min(child)
+ if (heap[rootIndex] > child[minIndex]) {
+ flag = 1
+ heapIndex = c(leftIndex, rightIndex)[minIndex]
+ temp = heap[heapIndex]
+ heap[heapIndex] = heap[rootIndex]
+ heap[rootIndex] = temp
+ rootIndex = heapIndex
+ }
+ }
+ return(heap)
+}
+
+heap.sort <- function(heap) {
+ sorted.elements = NULL
+ l = length(heap)
+ while(l > 0)
+ {
+ sorted.elements = c(sorted.elements, heap[1])
+ l = length(heap)
+ heap[1] = heap[l]
+ heap = heap[1:(l - 1)]
+ heap = modify.heap(heap, rootIndex = 1)
+ l = l - 1
+ }
+ return(sorted.elements)
+}
+
+# Example:
+# heap.sort(build.heap(c(5, 2, 3, 1, 4)))
+# [1] 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/insertion_sort.r b/Desktop/open-source/R/sorting_algorithms/insertion_sort.r
new file mode 100644
index 00000000..4830e69a
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/insertion_sort.r
@@ -0,0 +1,18 @@
+# Insertion sort in R:
+
+insertion.sort <- function(elements.vec) {
+ for (j in 2:length(elements.vec)) {
+ key = elements.vec[j]
+ i = j - 1
+ while (i > 0 && elements.vec[i] > key) {
+ elements.vec[(i + 1)] = elements.vec[i]
+ i = i - 1
+ }
+ elements.vec[(i + 1)] = key
+ }
+ return(elements.vec)
+}
+
+# Example:
+# insertion.sort(c(5, 2, 3, 1, 4))
+# [1] 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/merge_sort.r b/Desktop/open-source/R/sorting_algorithms/merge_sort.r
new file mode 100644
index 00000000..731a5da0
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/merge_sort.r
@@ -0,0 +1,32 @@
+# Merge sort in R:
+
+merge.func <-function(leftArray, rightArray) {
+ l <- numeric(length(leftArray) + length(rightArray))
+ leftIndex <- 1; rightIndex <- 1; i <- 1;
+ for(i in 1:length(l)) {
+ if((leftIndex <= length(leftArray) && leftArray[leftIndex] < rightArray[rightIndex]) || rightIndex > length(rightArray)) {
+ l[i] <- leftArray[leftIndex]
+ leftIndex <- leftIndex + 1
+ } else {
+ l[i] <- rightArray[rightIndex]
+ rightIndex <- rightIndex + 1
+ }
+ }
+ return(l)
+}
+
+merge.sort <- function(elements.vec) {
+ if(length(elements.vec) > 1) {
+ m <- ceiling(length(elements.vec) / 2)
+ leftArray <- merge.sort(elements.vec[1:m])
+ rightArray <- merge.sort(elements.vec[(m + 1):length(elements.vec)])
+ merge.func(leftArray, rightArray)
+ }
+ else {
+ return(elements.vec)
+ }
+}
+
+# Example:
+# merge.sort(c(5, 2, 3, 1, 4))
+# [1] 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/odd_even_sort.r b/Desktop/open-source/R/sorting_algorithms/odd_even_sort.r
new file mode 100644
index 00000000..61a1e00a
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/odd_even_sort.r
@@ -0,0 +1,39 @@
+# Odd-Even Sort Function
+# Sorts an input vector in-place using the Odd-Even Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+odd_even_sort <- function(arr) {
+ n <- length(arr)
+ sorted <- FALSE
+ while (!sorted) {
+ sorted <- TRUE
+
+ # Odd-Even Sort (Phase 1 - Odd)
+ for (i in seq(1, n - 1, by = 2)) {
+ if (arr[i] > arr[i + 1]) {
+ temp <- arr[i]
+ arr[i] <- arr[i + 1]
+ arr[i + 1] <- temp
+ sorted <- FALSE
+ }
+ }
+
+ # Odd-Even Sort (Phase 2 - Even)
+ for (i in seq(2, n - 1, by = 2)) {
+ if (arr[i] > arr[i + 1]) {
+ temp <- arr[i]
+ arr[i] <- arr[i + 1]
+ arr[i + 1] <- temp
+ sorted <- FALSE
+ }
+ }
+ }
+ return(arr)
+}
+
+# Example usage:
+elements_vec <- c(3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5)
+odd_even_sorted_vec <- odd_even_sort(elements_vec)
+print(odd_even_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/pancake_sort.r b/Desktop/open-source/R/sorting_algorithms/pancake_sort.r
new file mode 100644
index 00000000..2826f5fd
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/pancake_sort.r
@@ -0,0 +1,45 @@
+# Function to flip the first 'k' elements of an array
+flip <- function(arr, k) {
+ arr[1:k] <- rev(arr[1:k]) # Reverse the first 'k' elements
+ return(arr)
+}
+
+# Function to find the index of the maximum element in an array
+findMaxIndex <- function(arr, n) {
+ maxIndex <- 1
+ for (i in 2:n) {
+ if (arr[i] > arr[maxIndex]) {
+ maxIndex <- i
+ }
+ }
+ return(maxIndex)
+}
+
+# Function to perform Pancake Sort
+pancakeSort <- function(arr) {
+ n <- length(arr)
+
+ for (currentSize in n:2) {
+ # Find the index of the maximum element in the unsorted part of the array
+ maxIndex <- findMaxIndex(arr, currentSize)
+
+ # If the maximum element is not at the end of the unsorted part, flip it
+ if (maxIndex != currentSize) {
+ # Flip the maximum element to the beginning of the array
+ arr <- flip(arr, maxIndex)
+
+ # Flip the maximum element to its correct position
+ arr <- flip(arr, currentSize)
+ }
+ }
+
+ return(arr)
+}
+
+# Example usage:
+arr <- c(3, 1, 5, 2, 4)
+cat("Original Array:", arr, "\n")
+
+# Call the Pancake Sort function to sort the array
+sortedArr <- pancakeSort(arr)
+cat("Sorted Array:", sortedArr, "\n")
diff --git a/Desktop/open-source/R/sorting_algorithms/patience_sort.r b/Desktop/open-source/R/sorting_algorithms/patience_sort.r
new file mode 100644
index 00000000..992debfc
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/patience_sort.r
@@ -0,0 +1,56 @@
+# Patience Sort Function
+# Sorts an input vector using the Patience Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+
+patience_sort <- function(arr) {
+ if (length(arr) == 0) {
+ return(arr)
+ }
+
+ piles <- list()
+
+ # Build piles
+ for (x in arr) {
+ placed <- FALSE
+ for (i in seq_along(piles)) {
+ if (x < tail(piles[[i]], n=1)) {
+ piles[[i]] <- c(piles[[i]], x)
+ placed <- TRUE
+ break
+ }
+ }
+ if (!placed) {
+ piles[[length(piles) + 1]] <- c(x)
+ }
+ }
+
+ # Collect sorted elements
+ sorted_arr <- c()
+ while (length(piles) > 0) {
+ # Find the pile with the smallest top element
+ min_top <- Inf
+ min_index <- -1
+ for (i in seq_along(piles)) {
+ if (tail(piles[[i]], n=1) < min_top) {
+ min_top <- tail(piles[[i]], n=1)
+ min_index <- i
+ }
+ }
+ # Remove the smallest top element and add it to the sorted array
+ sorted_arr <- c(sorted_arr, min_top)
+ piles[[min_index]] <- head(piles[[min_index]], -1)
+ if (length(piles[[min_index]]) == 0) {
+ piles[[min_index]] <- NULL
+ }
+ }
+
+ return(sorted_arr)
+}
+
+# Example usage:
+elements_vec <- c(4, 3, 2, 1)
+patience_sorted_vec <- patience_sort(elements_vec)
+print(patience_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/quick_sort.r b/Desktop/open-source/R/sorting_algorithms/quick_sort.r
new file mode 100644
index 00000000..ac0ff113
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/quick_sort.r
@@ -0,0 +1,22 @@
+# Quick sort in R:
+
+quick.sort <- function(elements.vec) {
+ if(length(elements.vec) <= 1) {
+ return(elements.vec)
+ }
+ pivot <- elements.vec[1]
+ non.pivot <- elements.vec[-1]
+ pivot_less <- quick.sort(non.pivot[non.pivot < pivot])
+ pivot_greater <- quick.sort(non.pivot[non.pivot >= pivot])
+ return(c(pivot_less, pivot, pivot_greater))
+}
+
+# Example:
+# quick.sort(c(5, 2, 3, 1, 1, 4))
+# [1] 1 1 2 3 4 5
+
+# Notes:
+# 1. Quick sort is not a stable sorting algorithm.
+# 2. It is implemented in the 'sort' function of base R:
+# sort(c(5, 2, 3, 1, 1, 4), method = "quick" , index.return = FALSE)
+# [1] 1 1 2 3 4 5
diff --git a/Desktop/open-source/R/sorting_algorithms/radix_sort.r b/Desktop/open-source/R/sorting_algorithms/radix_sort.r
new file mode 100644
index 00000000..4d71467e
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/radix_sort.r
@@ -0,0 +1,17 @@
+# Radix sort in R:
+
+radix.sort <- function(elements.vec) {
+ x <- nchar(max(elements.vec))
+ for (i in 1:x)
+ elements.vec <- elements.vec[order(elements.vec %% (10 ^ i))]
+ return(elements.vec)
+}
+
+# Example:
+# radix.sort(c(50, 3200, 27, 976, 820))
+# [1] 27 50 820 976 3200
+
+# Note:
+# It is implemented in the 'sort' function of base R:
+# sort(c(50, 3200, 27, 976, 820), method = "radix" , index.return = FALSE)
+# [1] 27 50 820 976 3200
diff --git a/Desktop/open-source/R/sorting_algorithms/selection_sort.r b/Desktop/open-source/R/sorting_algorithms/selection_sort.r
new file mode 100644
index 00000000..87391037
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/selection_sort.r
@@ -0,0 +1,23 @@
+# Selection sort in R:
+
+selection.sort <- function(elements.vec, ascending = TRUE) {
+ max <- length(elements.vec)
+ for (j in 1:(max - 1)) {
+ m <- elements.vec[j]
+ p <- j
+ for(k in (j + 1):max) {
+ if(ascending && elements.vec[k] < m || !ascending && elements.vec[k] > m) {
+ m <- elements.vec[k]
+ p <- k
+ }
+ }
+ elements.vec[p] <- elements.vec[j]
+ elements.vec[j] <- m
+ }
+ return(elements.vec)
+}
+
+# Example:
+# selection.sort(c(5, 2, 3, 1, 1, 4))
+# [1] 1 1 2 3 4 5
+# Note that selection sort is not a stable sorting algorithm.
diff --git a/Desktop/open-source/R/sorting_algorithms/shell_sort.r b/Desktop/open-source/R/sorting_algorithms/shell_sort.r
new file mode 100644
index 00000000..9c39f777
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/shell_sort.r
@@ -0,0 +1,37 @@
+# Function to perform Shell Sort
+shellSort <- function(arr) {
+ n <- length(arr)
+
+ # Start with a large gap and reduce it
+ gap <- n %/% 2 # Initial gap
+
+ while (gap > 0) {
+ for (i in (gap + 1):n) {
+ # Store the current element to be compared
+ temp <- arr[i]
+
+ # Compare the current element with elements at positions 'i - gap', 'i - 2 * gap', ...
+ j <- i
+ while (j > gap && arr[j - gap] > temp) {
+ arr[j] <- arr[j - gap]
+ j <- j - gap
+ }
+
+ # Place the current element in its correct position
+ arr[j] <- temp
+ }
+
+ # Reduce the gap for the next iteration
+ gap <- gap %/% 2
+ }
+
+ return(arr)
+}
+
+# Example usage:
+arr <- c(12, 34, 54, 2, 3)
+cat("Original Array:", arr, "\n")
+
+# Call the Shell Sort function to sort the array
+sortedArr <- shellSort(arr)
+cat("Sorted Array:", sortedArr, "\n")
diff --git a/Desktop/open-source/R/sorting_algorithms/stooge_sort.r b/Desktop/open-source/R/sorting_algorithms/stooge_sort.r
new file mode 100644
index 00000000..f9bdceef
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/stooge_sort.r
@@ -0,0 +1,18 @@
+# Stooge sort in R:
+
+stooge.sort <- function(elements.vec) {
+ i = 1
+ j = length(elements.vec)
+ if (elements.vec[j] < elements.vec[i]) elements.vec[c(j, i)] = elements.vec[c(i, j)]
+ if (j - i > 1) {
+ t = (j - i + 1) %/% 3
+ elements.vec[i:(j - t)] = stooge.sort(elements.vec[i:(j - t)])
+ elements.vec[(i + t):j] = stooge.sort(elements.vec[(i + t):j])
+ elements.vec[i:(j - t)] = stooge.sort(elements.vec[i:(j - t)])
+ }
+ elements.vec
+}
+
+# Example:
+# stooge.sort(sample(21, 20))
+# [1] 1 2 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
diff --git a/Desktop/open-source/R/sorting_algorithms/strand_sort.r b/Desktop/open-source/R/sorting_algorithms/strand_sort.r
new file mode 100644
index 00000000..25272674
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/strand_sort.r
@@ -0,0 +1,63 @@
+# Strand Sort Function
+# Sorts an input vector using the Strand Sort algorithm.
+# Parameters:
+# - arr: Input vector to be sorted.
+# Returns:
+# - Sorted vector.
+
+strand_sort <- function(arr) {
+ if (length(arr) <= 1) {
+ return(arr)
+ }
+
+ output <- c()
+
+ while (length(arr) > 0) {
+ sublist <- c(arr[1])
+ arr <- arr[-1]
+ i <- 1
+ while (i <= length(arr)) {
+ if (arr[i] >= tail(sublist, n=1)) {
+ sublist <- c(sublist, arr[i])
+ arr <- arr[-i]
+ } else {
+ i <- i + 1
+ }
+ }
+ output <- merge_sorted_lists(output, sublist)
+ }
+
+ return(output)
+}
+
+# Helper function to merge two sorted lists
+merge_sorted_lists <- function(list1, list2) {
+ result <- c()
+ i <- 1
+ j <- 1
+
+ while (i <= length(list1) && j <= length(list2)) {
+ if (list1[i] <= list2[j]) {
+ result <- c(result, list1[i])
+ i <- i + 1
+ } else {
+ result <- c(result, list2[j])
+ j <- j + 1
+ }
+ }
+
+ if (i <= length(list1)) {
+ result <- c(result, list1[i:length(list1)])
+ }
+
+ if (j <= length(list2)) {
+ result <- c(result, list2[j:length(list2)])
+ }
+
+ return(result)
+}
+
+# Example usage:
+elements_vec <- c(4, 2, 5, 3, 1)
+strand_sorted_vec <- strand_sort(elements_vec)
+print(strand_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/tim_sort.r b/Desktop/open-source/R/sorting_algorithms/tim_sort.r
new file mode 100644
index 00000000..db4835d9
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/tim_sort.r
@@ -0,0 +1,90 @@
+# Insertion Sort: Sort small subarrays (runs)
+insertion_sort <- function(arr, left, right) {
+ for (i in (left + 1):right) {
+ key <- arr[i]
+ j <- i - 1
+ while (j >= left && arr[j] > key) {
+ arr[j + 1] <- arr[j]
+ j <- j - 1
+ }
+ arr[j + 1] <- key
+ }
+ return(arr)
+}
+
+# Merge two sorted subarrays
+merge <- function(arr, left, mid, right) {
+ n1 <- mid - left + 1
+ n2 <- right - mid
+
+ left_part <- arr[left:(mid)]
+ right_part <- arr[(mid + 1):right]
+
+ i <- 1
+ j <- 1
+ k <- left
+
+ # Merge left_part and right_part into arr
+ while (i <= n1 && j <= n2) {
+ if (left_part[i] <= right_part[j]) {
+ arr[k] <- left_part[i]
+ i <- i + 1
+ } else {
+ arr[k] <- right_part[j]
+ j <- j + 1
+ }
+ k <- k + 1
+ }
+
+ # Copy remaining elements of left_part, if any
+ while (i <= n1) {
+ arr[k] <- left_part[i]
+ i <- i + 1
+ k <- k + 1
+ }
+
+ # Copy remaining elements of right_part, if any
+ while (j <= n2) {
+ arr[k] <- right_part[j]
+ j <- j + 1
+ k <- k + 1
+ }
+
+ return(arr)
+}
+
+# TimSort function
+tim_sort <- function(arr) {
+ n <- length(arr)
+ min_run <- 32
+
+ # Sort individual subarrays of size min_run using insertion sort
+ for (start in seq(1, n, by = min_run)) {
+ end <- min(start + min_run - 1, n)
+ arr <- insertion_sort(arr, start, end)
+ }
+
+ # Merge sorted subarrays
+ size <- min_run
+ while (size < n) {
+ for (left in seq(1, n, by = 2 * size)) {
+ mid <- min(left + size - 1, n)
+ right <- min(left + 2 * size - 1, n)
+ if (mid < right) {
+ arr <- merge(arr, left, mid, right)
+ }
+ }
+ size <- 2 * size
+ }
+
+ return(arr)
+}
+
+# Example usage:
+# -------------------
+# Input: c(5, 21, 7, 23, 19, 11, 16, 13)
+# Expected Output: c(5, 7, 11, 13, 16, 19, 21, 23)
+
+elements_vec <- c(5, 21, 7, 23, 19, 11, 16, 13)
+tim_sorted_vec <- tim_sort(elements_vec)
+print(tim_sorted_vec)
diff --git a/Desktop/open-source/R/sorting_algorithms/topological_sort.r b/Desktop/open-source/R/sorting_algorithms/topological_sort.r
new file mode 100644
index 00000000..3db79a7e
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/topological_sort.r
@@ -0,0 +1,49 @@
+# Function to perform topological sort
+topological_sort <- function(graph) {
+ # Number of vertices in the graph
+ num_vertices <- length(graph)
+
+ # Helper function to perform DFS
+ dfs <- function(node, visited, stack) {
+ visited[node] <- TRUE
+
+ # Visit all adjacent vertices
+ for (neighbor in graph[[node]]) {
+ if (!visited[neighbor]) {
+ dfs(neighbor, visited, stack)
+ }
+ }
+
+ # Push the current node onto the stack
+ stack <<- c(stack, node)
+ }
+
+ # Initialize data structures
+ visited <- rep(FALSE, num_vertices)
+ stack <- c()
+
+ # Perform DFS for each unvisited vertex
+ for (node in 1:num_vertices) {
+ if (!visited[node]) {
+ dfs(node, visited, stack)
+ }
+ }
+
+ # Reverse the stack to get the topological order
+ topological_order <- rev(stack)
+ return(topological_order)
+}
+
+# Example usage
+# Define a sample DAG as an adjacency list
+# Here, we represent the graph as a list of vectors, where each vector contains the neighbors of the corresponding node.
+graph <- list(
+ c(2, 3), # Node 1 points to nodes 2 and 3
+ c(3, 4), # Node 2 points to nodes 3 and 4
+ c(5), # Node 3 points to node 5
+ c(5), # Node 4 points to node 5
+ numeric(0) # Node 5 has no outgoing edges
+)
+
+topological_order <- topological_sort(graph)
+cat("Topological Order:", topological_order, "\n")
diff --git a/Desktop/open-source/R/sorting_algorithms/wiggle_sort.r b/Desktop/open-source/R/sorting_algorithms/wiggle_sort.r
new file mode 100644
index 00000000..808dcc1e
--- /dev/null
+++ b/Desktop/open-source/R/sorting_algorithms/wiggle_sort.r
@@ -0,0 +1,24 @@
+# Wiggle Sort Function
+# Rearranges the elements in the input vector into a wiggle pattern.
+# Parameters:
+# - arr: Input vector to be rearranged.
+# Returns:
+# - Wiggle sorted vector.
+wiggle_sort <- function(arr) {
+ n <- length(arr)
+ for (i in 2:n) {
+ if ((i %% 2 == 0 && arr[i] < arr[i - 1]) || (i %% 2 != 0 && arr[i] > arr[i - 1])) {
+ # Swap elements at odd positions if they are greater
+ # or at even positions if they are smaller.
+ temp <- arr[i]
+ arr[i] <- arr[i - 1]
+ arr[i - 1] <- temp
+ }
+ }
+ return(arr)
+}
+
+# Example usage:
+elements_vec <- c(3, 5, 2, 1, 6, 4)
+wiggle_sorted_vec <- wiggle_sort(elements_vec)
+print(wiggle_sorted_vec)
diff --git a/Desktop/open-source/R/string_manipulation/burrows.r b/Desktop/open-source/R/string_manipulation/burrows.r
new file mode 100644
index 00000000..b94f45f0
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/burrows.r
@@ -0,0 +1,31 @@
+# Burrows-Wheeler Transform (BWT) in R
+# Computes the Burrows-Wheeler Transform of a string
+# Useful in compression and efficient substring searching
+
+burrows_wheeler_transform <- function(s) {
+ s <- paste0(s, "$") # Append unique end-of-string character
+ n <- nchar(s)
+
+ # Generate all rotations of the string
+ rotations <- character(n)
+ for (i in 1:n) {
+ rotations[i] <- paste0(substr(s, i, n), substr(s, 1, i - 1))
+ }
+
+ # Sort the rotations lexicographically
+ rotations_sorted <- sort(rotations)
+
+ # Build BWT by taking the last character of each sorted rotation
+ bwt <- paste0(sapply(rotations_sorted, function(x) substr(x, n, n)), collapse = "")
+
+ return(list(original = s, rotations_sorted = rotations_sorted, bwt = bwt))
+}
+
+# Interactive input
+s <- readline(prompt = "Enter a string: ")
+result <- burrows_wheeler_transform(s)
+
+cat("Original string with end marker: ", result$original, "\n")
+cat("Sorted rotations:\n")
+print(result$rotations_sorted)
+cat("Burrows-Wheeler Transform: ", result$bwt, "\n")
diff --git a/Desktop/open-source/R/string_manipulation/findpalindrome.r b/Desktop/open-source/R/string_manipulation/findpalindrome.r
new file mode 100644
index 00000000..e6572cac
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/findpalindrome.r
@@ -0,0 +1,93 @@
+findPalindrome <- function(input) {
+
+ is.palindrome <- function(input) {
+ if (is.numeric(input)) { # checks if input is numeric
+ # convert the numeric input value to character
+ input_con <- as.character(input)
+ # split the string into characters
+ input_split <- c(unlist(strsplit(input_con, split = "")))
+ # reverse the characters
+ input_rev <- rev(input_split)
+ # conditional statement to compare split string
+ # with the reversed string
+ if (all(input_split == input_rev) != TRUE) {
+ return(FALSE)
+ } else {
+ return(TRUE)
+ }
+ } else if (is.character(input)) { # checks if input is character
+ # split the string into characters
+ input_split <- c(unlist(strsplit(tolower(input), split = "")))
+ # reverse the characters
+ input_rev <- rev(input_split)
+ # conditional statement to compare split string
+ # with the reversed string
+ if (all(input_split == input_rev) != TRUE) {
+ return(FALSE)
+ } else {
+ return(TRUE)
+ }
+ }
+ }
+
+ if(is.character(input)) {
+ # clean out punctuation
+ input_clean <- tm::removePunctuation(input)
+
+ # split the sentence into individual words
+ input_split <- c(unlist(strsplit(input_clean, split = " ")))
+
+ # loop every word in the text through the is.palindrome() function
+ # and return their boolean values
+ check_palin <- c()
+ for (i in input_split) {
+ result <- is.palindrome(i)
+ check_palin <- append(check_palin, result)
+ }
+
+ # check and return index positions of TRUE
+ indx <- which(check_palin == TRUE)
+
+ # use the index positions to filter input_split
+ palindromes <- input_split[indx]
+
+ # filter out words that contain less than one character
+ palin <- palindromes[nchar(palindromes) > 1]
+
+ return(noquote(palin))
+
+ } else if(is.numeric(input)) {
+ # convert numeric input to character
+ num_con <- as.character(input)
+
+ # clean out punctuation
+ input_clean <- tm::removePunctuation(num_con)
+
+ # split the sentence into individual words
+ input_split <- c(unlist(strsplit(input_clean, split = " ")))
+
+ # loop every word in the text through the is.palindrome() function
+ # and return their boolean values
+ check_palin <- c()
+ for (i in input_split) {
+ result <- is.palindrome(i)
+ check_palin <- append(check_palin, result)
+ }
+
+ # check and return index positions of TRUE
+ indx <- which(check_palin == TRUE)
+
+ # use the index positions to filter input_split
+ palindromes <- input_split[indx]
+
+ # filter out numbers that contain less than one character
+ palin <- palindromes[nchar(palindromes) > 1]
+
+ return(noquote(palin))
+
+ }
+}
+
+my_text <- "Bob works in a shop on a street called Pop. His work ID is 444, and his manager owns a racecar."
+
+findPalindrome(my_text)
diff --git a/Desktop/open-source/R/string_manipulation/hamming_distance.r b/Desktop/open-source/R/string_manipulation/hamming_distance.r
new file mode 100644
index 00000000..03bb8225
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/hamming_distance.r
@@ -0,0 +1,30 @@
+# Hamming distance
+library(roxygen2)
+library(docstring)
+
+
+hamming_distance <- function(input1, input2) {
+ #' Find the hamming distance between two strings
+ #'
+ #' @description Finds the hamming distance between two strings
+ #' @param input1 String
+ #' @param input2 String
+ #' @usage hamming_distance(input1, input2)
+ #' @details In information theory, the Hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
+ #' In other words, it measures the minimum number of substitutions required to change one string into the other, or the minimum number of errors that could have transformed one string into the other.
+ #' In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences. It is named after the American mathematician Richard Hamming.
+ #' @references https://en.wikipedia.org/wiki/Hamming_distance
+
+ if (length(input1) != length(input2)) stop("String lengths must be the same")
+
+ sum(input1 != input2)
+}
+
+
+x1 = strsplit("karolin", "")[[1]]
+y1 = strsplit("kathrin", "")[[1]]
+print(hamming_distance(x1, y1) == 3) # returns TRUE
+
+x2 = strsplit("0000", "")[[1]]
+y2 = strsplit("1111", "")[[1]]
+print(hamming_distance(x2, y2) == 4) # returns TRUE
diff --git a/Desktop/open-source/R/string_manipulation/is.anagram.r b/Desktop/open-source/R/string_manipulation/is.anagram.r
new file mode 100644
index 00000000..84196674
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/is.anagram.r
@@ -0,0 +1,22 @@
+is.anagram <- function(word1, word2) {
+ # Convert words to lowercase
+ word1 <- tolower(word1)
+ word2 <- tolower(word2)
+
+ # Check if the words have the same length
+ if (nchar(word1) != nchar(word2)) {
+ return(FALSE)
+ }
+
+ # Check if the sorted characters of the words are the same
+ sorted_word1 <- sort(strsplit(word1, "")[[1]])
+ sorted_word2 <- sort(strsplit(word2, "")[[1]])
+
+ if (identical(sorted_word1, sorted_word2)) {
+ return(TRUE)
+ } else {
+ return(FALSE)
+ }
+}
+
+is.anagram(word1 = "rats",word2 = "star")
diff --git a/Desktop/open-source/R/string_manipulation/is.lower.r b/Desktop/open-source/R/string_manipulation/is.lower.r
new file mode 100644
index 00000000..c48699ac
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/is.lower.r
@@ -0,0 +1,12 @@
+is.lowercase <- function(string) {
+ # split the string at character level
+ string_split <- c(unlist(strsplit(string, split = "")))
+ # check if the split string exactly matches its lowercase version
+ check_case <- string_split == tolower(string_split)
+ # return a boolean value based on the outcome of the check
+ return(all(check_case))
+}
+
+is.lowercase("social")
+
+
diff --git a/Desktop/open-source/R/string_manipulation/is.uppercase.r b/Desktop/open-source/R/string_manipulation/is.uppercase.r
new file mode 100644
index 00000000..acc8926d
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/is.uppercase.r
@@ -0,0 +1,10 @@
+is.uppercase <- function(string) {
+ # split the string at character level
+ string_split <- c(unlist(strsplit(string, split = "")))
+ # check if the split string exactly matches its uppercase version
+ check_case <- string_split == toupper(string_split)
+ # return a boolean value based on the outcome of the check
+ return(all(check_case))
+}
+
+is.uppercase("BUSINESS")
diff --git a/Desktop/open-source/R/string_manipulation/kmp_string_matching.r b/Desktop/open-source/R/string_manipulation/kmp_string_matching.r
new file mode 100644
index 00000000..bde4bb07
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/kmp_string_matching.r
@@ -0,0 +1,343 @@
+# Knuth-Morris-Pratt (KMP) String Matching Algorithm
+#
+# The KMP algorithm efficiently finds all occurrences of a pattern in a text
+# by preprocessing the pattern to avoid unnecessary character comparisons.
+# It uses a "failure function" (also called "prefix function") to skip characters
+# intelligently when a mismatch occurs.
+#
+# Time Complexity: O(n + m) where n = text length, m = pattern length
+# Space Complexity: O(m) for the failure function array
+#
+# This is a significant improvement over naive string matching O(n*m)
+#
+# Applications:
+# - Text editors (find/replace functionality)
+# - DNA sequence analysis in bioinformatics
+# - Plagiarism detection systems
+# - Web search engines
+
+# Function to compute the failure function (prefix function)
+compute_failure_function <- function(pattern) {
+ #' Compute the failure function for KMP algorithm
+ #' @param pattern: Pattern string to preprocess
+ #' @return: Vector of failure function values
+
+ m <- nchar(pattern)
+ failure <- rep(0, m)
+
+ # failure[1] is always 0 (single character has no proper prefix/suffix)
+ if (m == 0) return(integer(0))
+ if (m == 1) return(0)
+
+ j <- 0 # Length of previous longest prefix suffix
+
+ for (i in 2:m) {
+ # Get current character
+ curr_char <- substr(pattern, i, i)
+
+ # Handle mismatches by following failure links
+ while (j > 0 && substr(pattern, j + 1, j + 1) != curr_char) {
+ j <- failure[j]
+ }
+
+ # If characters match, increment j
+ if (substr(pattern, j + 1, j + 1) == curr_char) {
+ j <- j + 1
+ }
+
+ failure[i] <- j
+ }
+
+ return(failure)
+}
+
+# Main KMP string matching algorithm
+kmp_search <- function(text, pattern) {
+ #' Find all occurrences of pattern in text using KMP algorithm
+ #' @param text: Text string to search in
+ #' @param pattern: Pattern string to search for
+ #' @return: Vector of starting positions (1-indexed) where pattern occurs
+
+ n <- nchar(text)
+ m <- nchar(pattern)
+
+ # Handle edge cases
+ if (m == 0) return(integer(0))
+ if (n == 0 || m > n) return(integer(0))
+
+ # Precompute failure function
+ failure <- compute_failure_function(pattern)
+
+ matches <- c()
+ j <- 0 # Index for pattern
+
+ for (i in 1:n) {
+ # Get current character from text
+ curr_char <- substr(text, i, i)
+
+ # Handle mismatches using failure function
+ while (j > 0 && substr(pattern, j + 1, j + 1) != curr_char) {
+ j <- failure[j]
+ }
+
+ # If characters match, advance pattern index
+ if (substr(pattern, j + 1, j + 1) == curr_char) {
+ j <- j + 1
+ }
+
+ # Check for complete pattern match
+ if (j == m) {
+ matches <- c(matches, i - m + 1) # 1-indexed position
+ j <- failure[j] # Prepare for next potential match
+ }
+ }
+
+ return(matches)
+}
+
+# Function to find first occurrence only (more efficient)
+kmp_search_first <- function(text, pattern) {
+ #' Find first occurrence of pattern in text using KMP algorithm
+ #' @param text: Text string to search in
+ #' @param pattern: Pattern string to search for
+ #' @return: Starting position (1-indexed) of first match, or -1 if not found
+
+ n <- nchar(text)
+ m <- nchar(pattern)
+
+ # Handle edge cases
+ if (m == 0) return(-1)
+ if (n == 0 || m > n) return(-1)
+
+ # Precompute failure function
+ failure <- compute_failure_function(pattern)
+
+ j <- 0 # Index for pattern
+
+ for (i in 1:n) {
+ # Get current character from text
+ curr_char <- substr(text, i, i)
+
+ # Handle mismatches using failure function
+ while (j > 0 && substr(pattern, j + 1, j + 1) != curr_char) {
+ j <- failure[j]
+ }
+
+ # If characters match, advance pattern index
+ if (substr(pattern, j + 1, j + 1) == curr_char) {
+ j <- j + 1
+ }
+
+ # Check for complete pattern match
+ if (j == m) {
+ return(i - m + 1) # Return 1-indexed position
+ }
+ }
+
+ return(-1) # Pattern not found
+}
+
+# Function to count occurrences without storing positions
+kmp_count <- function(text, pattern) {
+ #' Count occurrences of pattern in text using KMP algorithm
+ #' @param text: Text string to search in
+ #' @param pattern: Pattern string to search for
+ #' @return: Number of occurrences
+
+ n <- nchar(text)
+ m <- nchar(pattern)
+
+ # Handle edge cases
+ if (m == 0) return(0)
+ if (n == 0 || m > n) return(0)
+
+ # Precompute failure function
+ failure <- compute_failure_function(pattern)
+
+ count <- 0
+ j <- 0 # Index for pattern
+
+ for (i in 1:n) {
+ # Get current character from text
+ curr_char <- substr(text, i, i)
+
+ # Handle mismatches using failure function
+ while (j > 0 && substr(pattern, j + 1, j + 1) != curr_char) {
+ j <- failure[j]
+ }
+
+ # If characters match, advance pattern index
+ if (substr(pattern, j + 1, j + 1) == curr_char) {
+ j <- j + 1
+ }
+
+ # Check for complete pattern match
+ if (j == m) {
+ count <- count + 1
+ j <- failure[j] # Prepare for next potential match
+ }
+ }
+
+ return(count)
+}
+
+# Naive string matching for comparison
+naive_search <- function(text, pattern) {
+ #' Naive string matching algorithm for performance comparison
+ #' @param text: Text string to search in
+ #' @param pattern: Pattern string to search for
+ #' @return: Vector of starting positions where pattern occurs
+
+ n <- nchar(text)
+ m <- nchar(pattern)
+ matches <- c()
+
+ if (m == 0 || n == 0 || m > n) return(matches)
+
+ for (i in 1:(n - m + 1)) {
+ if (substr(text, i, i + m - 1) == pattern) {
+ matches <- c(matches, i)
+ }
+ }
+
+ return(matches)
+}
+
+# Function to visualize the failure function
+visualize_failure_function <- function(pattern) {
+ #' Print a visual representation of the failure function
+ #' @param pattern: Pattern to analyze
+
+ failure <- compute_failure_function(pattern)
+ m <- nchar(pattern)
+
+ cat("Pattern: ")
+ for (i in 1:m) {
+ cat(sprintf("%2s ", substr(pattern, i, i)))
+ }
+ cat("\n")
+
+ cat("Index: ")
+ for (i in 1:m) {
+ cat(sprintf("%2d ", i))
+ }
+ cat("\n")
+
+ cat("Failure: ")
+ for (i in 1:m) {
+ cat(sprintf("%2d ", failure[i]))
+ }
+ cat("\n\n")
+}
+
+# Example usage and testing
+cat("=== Knuth-Morris-Pratt (KMP) String Matching Algorithm ===\n\n")
+
+# Test 1: Basic pattern matching
+cat("1. Basic Pattern Matching\n")
+text1 <- "ABABDABACDABABCABCABCABCABC"
+pattern1 <- "ABC"
+
+cat("Text: ", text1, "\n")
+cat("Pattern: ", pattern1, "\n")
+
+matches1 <- kmp_search(text1, pattern1)
+cat("KMP matches at positions:", paste(matches1, collapse = ", "), "\n")
+
+# Verify with naive approach
+naive_matches1 <- naive_search(text1, pattern1)
+cat("Naive matches at positions:", paste(naive_matches1, collapse = ", "), "\n")
+cat("Results match:", identical(matches1, naive_matches1), "\n\n")
+
+# Test 2: Pattern with repeating characters
+cat("2. Pattern with Repeating Characters\n")
+text2 <- "AABAACAADAABAABA"
+pattern2 <- "AABA"
+
+cat("Text: ", text2, "\n")
+cat("Pattern: ", pattern2, "\n")
+
+visualize_failure_function(pattern2)
+
+matches2 <- kmp_search(text2, pattern2)
+cat("Matches at positions:", paste(matches2, collapse = ", "), "\n")
+
+# Show the actual matches
+for (pos in matches2) {
+ match_str <- substr(text2, pos, pos + nchar(pattern2) - 1)
+ cat("Position", pos, ":", match_str, "\n")
+}
+cat("\n")
+
+# Test 3: Edge cases
+cat("3. Edge Cases\n")
+cat("Empty pattern:", length(kmp_search("hello", "")), "matches\n")
+cat("Empty text:", length(kmp_search("", "hello")), "matches\n")
+cat("Pattern longer than text:", length(kmp_search("hi", "hello")), "matches\n")
+cat("Single character pattern:", paste(kmp_search("abcabc", "a"), collapse = ", "), "\n")
+cat("Pattern not in text:", paste(kmp_search("hello world", "xyz"), collapse = ", "), "\n")
+cat("Identical strings:", paste(kmp_search("hello", "hello"), collapse = ", "), "\n")
+cat("Pattern at start:", paste(kmp_search("hello world", "hello"), collapse = ", "), "\n")
+cat("Pattern at end:", paste(kmp_search("world hello", "hello"), collapse = ", "), "\n\n")
+
+# Test 4: Performance comparison
+cat("4. Performance Comparison\n")
+# Create a text with many potential false matches
+repeated_text <- paste(rep("AAAAB", 200), collapse = "")
+repeated_pattern <- "AAAAB"
+
+cat("Text length:", nchar(repeated_text), "\n")
+cat("Pattern:", repeated_pattern, "\n")
+
+# Time the KMP algorithm
+start_time <- Sys.time()
+kmp_result <- kmp_search(repeated_text, repeated_pattern)
+kmp_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+# Time the naive algorithm
+start_time <- Sys.time()
+naive_result <- naive_search(repeated_text, repeated_pattern)
+naive_time <- as.numeric(Sys.time() - start_time, units = "secs")
+
+cat("KMP found", length(kmp_result), "matches in", sprintf("%.6f", kmp_time), "seconds\n")
+cat("Naive found", length(naive_result), "matches in", sprintf("%.6f", naive_time), "seconds\n")
+
+if (naive_time > 0 && kmp_time > 0) {
+ speedup <- naive_time / kmp_time
+ cat("KMP speedup:", sprintf("%.2f", speedup), "x faster\n")
+}
+cat("\n")
+
+# Test 5: Real-world example - DNA sequence matching
+cat("5. DNA Sequence Matching Example\n")
+dna_sequence <- "ATCGATCGATCGAATCGATCGATCGAATCGATCG"
+dna_pattern <- "ATCG"
+
+cat("DNA Sequence:", dna_sequence, "\n")
+cat("Pattern: ", dna_pattern, "\n")
+
+dna_matches <- kmp_search(dna_sequence, dna_pattern)
+cat("Pattern occurs at positions:", paste(dna_matches, collapse = ", "), "\n")
+cat("Total occurrences:", length(dna_matches), "\n\n")
+
+# Test 6: Failure function examples
+cat("6. Failure Function Examples\n")
+patterns <- c("ABCABCAB", "AAAA", "ABCDE", "ABABABAB")
+
+for (pattern in patterns) {
+ cat("Pattern:", pattern, "\n")
+ visualize_failure_function(pattern)
+}
+
+# Test 7: Case sensitivity
+cat("7. Case Sensitivity\n")
+text_case <- "Hello World Hello"
+pattern_case <- "hello"
+matches_case <- kmp_search(text_case, pattern_case)
+cat("Text:", text_case, "\n")
+cat("Pattern:", pattern_case, "\n")
+cat("Matches (case-sensitive):", paste(matches_case, collapse = ", "), "\n")
+
+# Case-insensitive version
+matches_insensitive <- kmp_search(tolower(text_case), tolower(pattern_case))
+cat("Matches (case-insensitive):", paste(matches_insensitive, collapse = ", "), "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/string_manipulation/levenshtein.r b/Desktop/open-source/R/string_manipulation/levenshtein.r
new file mode 100644
index 00000000..73ac2bd3
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/levenshtein.r
@@ -0,0 +1,38 @@
+# Levenshtein Distance in R
+# Calculates minimum number of edits (insert, delete, replace)
+# to convert one string into another and prints the distance.
+
+levenshtein_distance <- function(s1, s2) {
+ n <- nchar(s1)
+ m <- nchar(s2)
+
+ # Initialize DP matrix
+ dp <- matrix(0, nrow = n + 1, ncol = m + 1)
+
+ for (i in 0:n) dp[i + 1, 1] <- i
+ for (j in 0:m) dp[1, j + 1] <- j
+
+ # Fill DP table
+ for (i in 1:n) {
+ for (j in 1:m) {
+ if (substr(s1, i, i) == substr(s2, j, j)) {
+ dp[i + 1, j + 1] <- dp[i, j]
+ } else {
+ dp[i + 1, j + 1] <- min(
+ dp[i, j + 1] + 1, # Deletion
+ dp[i + 1, j] + 1, # Insertion
+ dp[i, j] + 1 # Substitution
+ )
+ }
+ }
+ }
+
+ return(dp[n + 1, m + 1])
+}
+
+# Interactive input
+s1 <- tolower(readline("Enter first string: "))
+s2 <- tolower(readline("Enter second string: "))
+
+distance <- levenshtein_distance(s1, s2)
+cat("Levenshtein distance between '", s1, "' and '", s2, "' is:", distance, "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/string_manipulation/longest.palindromic.subsequence.r b/Desktop/open-source/R/string_manipulation/longest.palindromic.subsequence.r
new file mode 100644
index 00000000..7ee8236d
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/longest.palindromic.subsequence.r
@@ -0,0 +1,62 @@
+# Longest Palindromic Subsequence in R
+# Author: sgindeed
+# Description: Finds and prints the longest palindromic subsequence and its length
+
+# Ask for user input
+input.string <- readline(prompt = "Enter a string: ")
+
+# Convert string to lowercase for consistency
+clean.string <- tolower(input.string)
+
+# Get length of string
+n <- nchar(clean.string)
+
+# Split string into characters
+chars <- strsplit(clean.string, "")[[1]]
+
+# Initialize DP table for lengths
+dp <- matrix(0, nrow = n, ncol = n)
+
+# Each single character is a palindrome of length 1
+for (i in seq_len(n)) {
+ dp[i, i] <- 1
+}
+
+# Fill the DP table
+for (cl in 2:n) {
+ for (i in 1:(n - cl + 1)) {
+ j <- i + cl - 1
+ if (chars[i] == chars[j] && cl == 2) {
+ dp[i, j] <- 2
+ } else if (chars[i] == chars[j]) {
+ dp[i, j] <- dp[i + 1, j - 1] + 2
+ } else {
+ dp[i, j] <- max(dp[i + 1, j], dp[i, j - 1])
+ }
+ }
+}
+
+# Function to reconstruct the subsequence
+reconstructLPS <- function(chars, dp, i, j) {
+ if (i > j) {
+ return("")
+ }
+ if (i == j) {
+ return(chars[i])
+ }
+ if (chars[i] == chars[j]) {
+ return(paste0(chars[i], reconstructLPS(chars, dp, i + 1, j - 1), chars[j]))
+ }
+ if (dp[i + 1, j] > dp[i, j - 1]) {
+ return(reconstructLPS(chars, dp, i + 1, j))
+ } else {
+ return(reconstructLPS(chars, dp, i, j - 1))
+ }
+}
+
+# Get the longest palindromic subsequence
+lps <- reconstructLPS(chars, dp, 1, n)
+
+# Display the result
+cat("Longest Palindromic Subsequence:", lps, "\n")
+cat("Length:", nchar(lps), "\n")
diff --git a/Desktop/open-source/R/string_manipulation/longest.substring.no.repeat.r b/Desktop/open-source/R/string_manipulation/longest.substring.no.repeat.r
new file mode 100644
index 00000000..b4898ba4
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/longest.substring.no.repeat.r
@@ -0,0 +1,39 @@
+# Longest Substring Without Repeating Characters in R
+# Author: sgindeed
+# Description: Finds the length of the longest substring without repeating characters
+
+# Ask for user input
+input.string <- readline(prompt = "Enter a string: ")
+
+# Convert the string to lowercase for case-insensitive processing (optional)
+clean.string <- tolower(input.string)
+
+# Split string into characters
+chars <- strsplit(clean.string, "")[[1]]
+
+# Initialize variables
+hash.table <- list() # stores last index of characters
+max.length <- 0
+start <- 1 # start of current window
+
+# Iterate over characters
+for (i in seq_along(chars)) {
+ char <- chars[i]
+
+ # If character was seen before and is inside current window
+ if (!is.null(hash.table[[char]]) && hash.table[[char]] >= start) {
+ start <- hash.table[[char]] + 1 # move start to one after previous occurrence
+ }
+
+ # Update last seen index of the character
+ hash.table[[char]] <- i
+
+ # Update max length
+ current.length <- i - start + 1
+ if (current.length > max.length) {
+ max.length <- current.length
+ }
+}
+
+# Display the result
+cat("Length of the longest substring without repeating characters:", max.length, "\n")
diff --git a/Desktop/open-source/R/string_manipulation/manacher.longest.palindrome.r b/Desktop/open-source/R/string_manipulation/manacher.longest.palindrome.r
new file mode 100644
index 00000000..29b1bc69
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/manacher.longest.palindrome.r
@@ -0,0 +1,34 @@
+# Manacher's Algorithm – Longest Palindromic Substring in R
+
+longest_palindrome <- function(s) {
+ T <- paste0("^#", paste(unlist(strsplit(s, "")), collapse = "#"), "#$")
+ n <- nchar(T)
+ P <- integer(n)
+ center <- 0
+ right <- 0
+
+ for (i in 2:(n - 1)) {
+ mirror <- 2 * center - i
+ if (i < right)
+ P[i] <- min(right - i, P[mirror])
+
+ while (substr(T, i + (1 + P[i]), i + (1 + P[i])) ==
+ substr(T, i - (1 + P[i]), i - (1 + P[i])))
+ P[i] <- P[i] + 1
+
+ if (i + P[i] > right) {
+ center <- i
+ right <- i + P[i]
+ }
+ }
+
+ max_len <- max(P)
+ center_index <- which.max(P)
+ start <- (center_index - max_len) / 2
+ palindrome <- substr(s, start + 1, start + max_len)
+ return(palindrome)
+}
+
+s <- tolower(readline("Enter a string: "))
+result <- longest_palindrome(s)
+cat("Longest Palindromic Substring:", result, "\n")
\ No newline at end of file
diff --git a/Desktop/open-source/R/string_manipulation/maskwords.r b/Desktop/open-source/R/string_manipulation/maskwords.r
new file mode 100644
index 00000000..24488def
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/maskwords.r
@@ -0,0 +1,35 @@
+maskWords <- function(text, mask) {
+ text_split <- c(unlist(strsplit(text, split = " ")))
+
+ post_n <- c()
+ for (i in text_split) {
+ post_n <- c(
+ post_n,
+ if (i %in% c(
+ "birds",
+ "BIRDS",
+ "Birds",
+ "market",
+ "Market",
+ "MARKET",
+ "street",
+ "STREET",
+ "Street"
+ )) {
+ tolower(i)
+ } else {
+ i
+ }
+ )
+ }
+
+ clean_text <- gsub("\\b(birds|street|market)\\b", mask, post_n)
+
+ clean_text <- gsub("\n", "", clean_text)
+
+ return(paste(clean_text, collapse = " "))
+}
+
+post <- "The lady bought groceries from the market, but some of them spilled on the street, and the birds helped themselves."
+
+maskWords(text = post,mask = "$$$")
diff --git a/Desktop/open-source/R/string_manipulation/min.palindromic.insert.r b/Desktop/open-source/R/string_manipulation/min.palindromic.insert.r
new file mode 100644
index 00000000..8afdbbd9
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/min.palindromic.insert.r
@@ -0,0 +1,72 @@
+#' Computes the minimum number of characters to insert to make a string a palindrome.
+#'
+#' @param s A character string (case-insensitive).
+#' @return A list containing:
+#' \item{min_insertions}{Minimum number of insertions required.}
+#' \item{palindrome}{One possible resulting palindrome.}
+#' @details
+#' Uses dynamic programming to compute the minimum insertions in O(n^2) time and space.
+#' Also reconstructs one possible palindrome.
+#'
+#' @examples
+#' min_palindrome_insertion("race")
+#' # Returns 3 insertions and palindrome "ecarace"
+# Minimum Palindrome Insertion in R
+# Computes the minimum number of characters to insert to make a string a palindrome
+# Also prints one possible resulting palindrome.
+
+min_palindrome_insertion <- function(s) {
+ n <- nchar(s)
+ chars <- strsplit(s, "")[[1]]
+
+ # DP table to store minimum insertions for substring s[i..j]
+ dp <- matrix(0, nrow = n, ncol = n)
+
+ # Fill the table
+ for (length_sub in 2:n) {
+ for (i in 1:(n - length_sub + 1)) {
+ j <- i + length_sub - 1
+ if (chars[i] == chars[j]) {
+ dp[i, j] <- dp[i + 1, j - 1]
+ } else {
+ dp[i, j] <- min(dp[i + 1, j], dp[i, j - 1]) + 1
+ }
+ }
+ }
+
+ # Reconstruct one possible palindrome
+ i <- 1
+ j <- n
+ length_sub <- dp[1, n] + n
+ res <- character(length_sub)
+ left <- 1
+ right <- length(res)
+
+ while (i <= j) {
+ if (chars[i] == chars[j]) {
+ res[left] <- chars[i]
+ res[right] <- chars[j]
+ i <- i + 1
+ j <- j - 1
+ } else if (dp[i + 1, j] < dp[i, j - 1]) {
+ res[left] <- chars[i]
+ res[right] <- chars[i]
+ i <- i + 1
+ } else {
+ res[left] <- chars[j]
+ res[right] <- chars[j]
+ j <- j - 1
+ }
+ left <- left + 1
+ right <- right - 1
+ }
+
+ list(min_insertions = dp[1, n], palindrome = paste(res, collapse = ""))
+}
+
+# --- Interactive input ---
+s <- tolower(readline("Enter a string: "))
+
+result <- min_palindrome_insertion(s)
+cat("Minimum insertions required:", result$min_insertions, "\n")
+cat("One possible resulting palindrome:", result$palindrome, "\n")
diff --git a/Desktop/open-source/R/string_manipulation/minimum.window.substring.r b/Desktop/open-source/R/string_manipulation/minimum.window.substring.r
new file mode 100644
index 00000000..c220ecc3
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/minimum.window.substring.r
@@ -0,0 +1,66 @@
+# Minimum Window Substring in R
+# Author: sgindeed
+# Description: Finds the smallest substring of s that contains all characters of t
+
+# Ask for inputs
+s <- readline(prompt = "Enter main string: ")
+t <- readline(prompt = "Enter target characters: ")
+
+# Convert to lowercase for case-insensitivity
+s <- tolower(s)
+t <- tolower(t)
+
+# Edge case
+if (nchar(s) == 0 || nchar(t) == 0) {
+ cat("Empty input. Exiting.\n")
+ quit(save = "no")
+}
+
+# Convert to char arrays
+s_chars <- strsplit(s, "")[[1]]
+t_chars <- strsplit(t, "")[[1]]
+
+# Frequency of characters in t
+t_count <- table(t_chars)
+window_count <- list()
+
+required <- length(t_count)
+formed <- 0
+
+left <- 1
+right <- 0
+min_len <- Inf
+min_window <- ""
+
+# Sliding window
+while (right < length(s_chars)) {
+ right <- right + 1
+ char <- s_chars[right]
+ window_count[[char]] <- (window_count[[char]] %||% 0) + 1
+
+ if (!is.na(t_count[char]) && window_count[[char]] == t_count[char]) {
+ formed <- formed + 1
+ }
+
+ # Try to contract the window
+ while (left <= right && formed == required) {
+ if ((right - left + 1) < min_len) {
+ min_len <- right - left + 1
+ min_window <- paste0(s_chars[left:right], collapse = "")
+ }
+
+ left_char <- s_chars[left]
+ window_count[[left_char]] <- window_count[[left_char]] - 1
+ if (!is.na(t_count[left_char]) && window_count[[left_char]] < t_count[left_char]) {
+ formed <- formed - 1
+ }
+ left <- left + 1
+ }
+}
+
+if (is.infinite(min_len)) {
+ cat("No valid window found.\n")
+} else {
+ cat("Minimum window substring:", min_window, "\n")
+ cat("Length:", min_len, "\n")
+}
diff --git a/Desktop/open-source/R/string_manipulation/rearrangeways.r b/Desktop/open-source/R/string_manipulation/rearrangeways.r
new file mode 100644
index 00000000..c2e1636c
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/rearrangeways.r
@@ -0,0 +1,33 @@
+rearrangeWays <- function(string, as_report = TRUE){
+
+ if(as_report){ # conditional statement
+
+ # split the string into letters
+ string_split <- c(unlist(strsplit(string,split = "")))
+
+ # get the factorial of the letters vector
+ possible_ways <- factorial(length(string_split))
+
+ # create the answer text
+ answer <- paste(string, "can be rearranged in", possible_ways, "possible ways")
+
+
+ return(noquote(answer))
+
+
+ }else{
+
+ # split the string into letters
+ string_split <- c(unlist(strsplit(string,split = "")))
+
+ # get the factorial of the letters vector
+ possible_ways <- factorial(length(string_split))
+
+ return(possible_ways)
+
+ }
+
+
+}
+
+rearrangeWays(string = "straight")
diff --git a/Desktop/open-source/R/string_manipulation/shortest.common.supersequence.r b/Desktop/open-source/R/string_manipulation/shortest.common.supersequence.r
new file mode 100644
index 00000000..712a0aa7
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/shortest.common.supersequence.r
@@ -0,0 +1,57 @@
+# Shortest Common Supersequence (SCS) in R
+# Finds the shortest string that contains both input strings as subsequences.
+
+shortest_common_supersequence <- function(X, Y) {
+ m <- nchar(X)
+ n <- nchar(Y)
+
+ # Initialize DP table for Longest Common Subsequence (LCS)
+ dp <- matrix(0, nrow = m + 1, ncol = n + 1)
+
+ for (i in 1:m) {
+ for (j in 1:n) {
+ if (substr(X, i, i) == substr(Y, j, j)) {
+ dp[i + 1, j + 1] <- dp[i, j] + 1
+ } else {
+ dp[i + 1, j + 1] <- max(dp[i, j + 1], dp[i + 1, j])
+ }
+ }
+ }
+
+ # Backtrack to build the SCS
+ i <- m; j <- n
+ scs <- ""
+ while (i > 0 && j > 0) {
+ if (substr(X, i, i) == substr(Y, j, j)) {
+ scs <- paste0(substr(X, i, i), scs)
+ i <- i - 1
+ j <- j - 1
+ } else if (dp[i, j + 1] > dp[i + 1, j]) {
+ scs <- paste0(substr(X, i, i), scs)
+ i <- i - 1
+ } else {
+ scs <- paste0(substr(Y, j, j), scs)
+ j <- j - 1
+ }
+ }
+
+ # Add remaining characters from X or Y
+ while (i > 0) {
+ scs <- paste0(substr(X, i, i), scs)
+ i <- i - 1
+ }
+ while (j > 0) {
+ scs <- paste0(substr(Y, j, j), scs)
+ j <- j - 1
+ }
+
+ return(list(length = nchar(scs), supersequence = scs))
+}
+
+# Interactive input
+X <- tolower(readline("Enter first string: "))
+Y <- tolower(readline("Enter second string: "))
+
+result <- shortest_common_supersequence(X, Y)
+cat("Length of SCS:", result$length, "\n")
+cat("Shortest Common Supersequence:", result$supersequence, "\n")
diff --git a/Desktop/open-source/R/string_manipulation/unique.letters.count.r b/Desktop/open-source/R/string_manipulation/unique.letters.count.r
new file mode 100644
index 00000000..8083ec3f
--- /dev/null
+++ b/Desktop/open-source/R/string_manipulation/unique.letters.count.r
@@ -0,0 +1,21 @@
+# Ask for user input
+input.string <- readline(prompt = "Enter a string: ")
+
+# Convert to lowercase and remove non-letter characters
+# [^a-zA-Z] ensures both uppercase and lowercase letters are kept before conversion
+clean.string <- tolower(gsub("[^a-zA-Z]", "", input.string))
+
+# Split string into individual letters
+letters.vec <- strsplit(clean.string, "")[[1]]
+
+# Get unique letters
+unique.letters <- unique(letters.vec)
+
+# Count occurrences of each unique letter (only for unique ones)
+letter.counts <- table(letters.vec)[unique.letters]
+
+# Display results
+cat("Unique letters and their counts:\n")
+for (letter in unique.letters) {
+ cat(letter, ":", letter.counts[letter], "\n")
+}