diff --git a/WORKSPACE b/WORKSPACE index ded3a8f..aba6cd3 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -66,3 +66,22 @@ http_archive( urls = ["https://github.com/bazelbuild/platforms/archive/refs/tags/0.0.6.zip"], strip_prefix = "platforms-0.0.6", ) + + + + +BOOST_VERSION = "1.83.0" +BOOST_ARCHIVE_NAME = "boost_{}".format(BOOST_VERSION.replace(".", "_")) + +http_archive( + name = "boost", + urls = [ + "https://archives.boost.io/release/{}/source/{}.tar.gz".format( + BOOST_VERSION, + BOOST_ARCHIVE_NAME, + ) + ], + strip_prefix = BOOST_ARCHIVE_NAME, + sha256 = "c0685b68dd44cc46574cce86c4e17c0f611b15e195be9848dfd0769a0a207628", + build_file = "//external:boost.BUILD", +) \ No newline at end of file diff --git a/external/boost.BUILD b/external/boost.BUILD new file mode 100644 index 0000000..64678ae --- /dev/null +++ b/external/boost.BUILD @@ -0,0 +1,16 @@ +# external/boost.BUILD +package(default_visibility = ["//visibility:public"]) + +# A cc_library for the Boost headers themselves. +cc_library( + name = "boost_headers", + hdrs = glob(["boost/**/*.hpp"]), + includes = ["."], +) + +# A specific target for dynamic_bitset, which is header-only +# and depends on the main headers. +cc_library( + name = "dynamic_bitset", + deps = [":boost_headers"], +) diff --git a/src/BUILD b/src/BUILD index 5596f05..8da8e34 100644 --- a/src/BUILD +++ b/src/BUILD @@ -121,6 +121,7 @@ cc_library( linkopts = OPT_LINKOPTS, deps = [ ":libutils", + "@boost//:dynamic_bitset", ], ) diff --git a/src/tesseract.cc b/src/tesseract.cc index 0d9040b..cc94fcd 100644 --- a/src/tesseract.cc +++ b/src/tesseract.cc @@ -15,7 +15,9 @@ #include "tesseract.h" #include +#include // For boost::hash_range #include +#include // For std::hash (though not strictly necessary here, but good practice) #include namespace { @@ -37,6 +39,17 @@ std::ostream& operator<<(std::ostream& os, const std::vector& vec) { }; // namespace +namespace std { +template <> +struct hash> { + size_t operator()(const boost::dynamic_bitset<>& bs) const { + // Delegate to Boost's internal hash_value for dynamic_bitset + // This is the correct and most efficient way. + return boost::hash_value(bs); + } +}; +} // namespace std + std::string TesseractConfig::str() { auto& config = *this; std::stringstream ss; @@ -73,7 +86,7 @@ double TesseractDecoder::get_detcost( ErrorCost ec; DetectorCostTuple dct; - for (size_t ei : d2e[d]) { + for (int ei : d2e[d]) { ec = error_costs[ei]; if (ec.min_cost >= min_cost) break; @@ -89,17 +102,6 @@ double TesseractDecoder::get_detcost( return min_cost + config.det_penalty; } -struct VectorCharHash { - size_t operator()(const std::vector& v) const { - size_t seed = v.size(); - - for (char el : v) { - seed = seed * 31 + static_cast(el); - } - return seed; - } -}; - TesseractDecoder::TesseractDecoder(TesseractConfig config_) : config(config_) { config.dem = common::remove_zero_probability_errors(config.dem); if (config.det_orders.empty()) { @@ -206,7 +208,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections) } void TesseractDecoder::flip_detectors_and_block_errors( - size_t detector_order, const std::vector& errors, std::vector& detectors, + size_t detector_order, const std::vector& errors, boost::dynamic_bitset<>& detectors, std::vector& detector_cost_tuples) const { for (size_t ei : errors) { size_t min_detector = std::numeric_limits::max(); @@ -217,15 +219,15 @@ void TesseractDecoder::flip_detectors_and_block_errors( } } - for (size_t oei : d2e[min_detector]) { + for (int oei : d2e[min_detector]) { detector_cost_tuples[oei].error_blocked = 1; if (!config.at_most_two_errors_per_detector && oei == ei) break; } - for (size_t d : edets[ei]) { + for (int d : edets[ei]) { detectors[d] = !detectors[d]; if (!detectors[d] && config.at_most_two_errors_per_detector) { - for (size_t oei : d2e[d]) { + for (int oei : d2e[d]) { detector_cost_tuples[oei].error_blocked = 1; } } @@ -239,10 +241,9 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, low_confidence_flag = false; std::priority_queue, std::greater> pq; - std::unordered_map, VectorCharHash>> - visited_detectors; + std::unordered_map>> visited_detectors; - std::vector initial_detectors(num_detectors, false); + boost::dynamic_bitset<> initial_detectors(num_detectors, false); std::vector initial_detector_cost_tuples(num_errors); for (size_t d : detections) { @@ -266,7 +267,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, size_t max_num_detectors = min_num_detectors + detector_beam; std::vector next_errors; - std::vector next_detectors; + boost::dynamic_bitset<> next_detectors; std::vector next_detector_cost_tuples; pq.push({initial_cost, min_num_detectors, std::vector()}); @@ -278,7 +279,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, if (node.num_detectors > max_num_detectors) continue; - std::vector detectors = initial_detectors; + boost::dynamic_bitset<> detectors = initial_detectors; std::vector detector_cost_tuples(num_errors); flip_detectors_and_block_errors(detector_order, node.errors, detectors, detector_cost_tuples); @@ -363,7 +364,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, size_t prev_ei = std::numeric_limits::max(); std::vector detector_cost_cache(num_detectors, -1); - for (size_t ei : d2e[min_detector]) { + for (int ei : d2e[min_detector]) { if (detector_cost_tuples[ei].error_blocked) continue; if (prev_ei != std::numeric_limits::max()) { @@ -398,7 +399,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, } if (!next_detectors[d] && config.at_most_two_errors_per_detector) { - for (size_t oei : d2e[d]) { + for (int oei : d2e[d]) { next_detector_cost_tuples[oei].error_blocked = next_detector_cost_tuples[oei].error_blocked == 1 ? 1 @@ -426,7 +427,7 @@ void TesseractDecoder::decode_to_errors(const std::vector& detections, } } - for (size_t od : eneighbors[ei]) { + for (int od : eneighbors[ei]) { if (!detectors[od] || !next_detectors[od]) continue; if (detector_cost_cache[od] == -1) { detector_cost_cache[od] = get_detcost(od, detector_cost_tuples); diff --git a/src/tesseract.h b/src/tesseract.h index 7528bed..aba7a50 100644 --- a/src/tesseract.h +++ b/src/tesseract.h @@ -15,6 +15,7 @@ #ifndef TESSERACT_DECODER_H #define TESSERACT_DECODER_H +#include #include #include #include @@ -101,7 +102,7 @@ struct TesseractDecoder { void initialize_structures(size_t num_detectors); double get_detcost(size_t d, const std::vector& detector_cost_tuples) const; void flip_detectors_and_block_errors(size_t detector_order, const std::vector& errors, - std::vector& detectors, + boost::dynamic_bitset<>& detectors, std::vector& detector_cost_tuples) const; };