1
1
-- | Parallel versions of 'filter' and 'simpleFilter'
2
2
3
3
module Text.Fuzzy.Parallel
4
- ( filter ,
5
- simpleFilter,
6
- match,
4
+ ( filter , filter',
5
+ simpleFilter, simpleFilter',
6
+ match, defChunkSize, defMaxResults,
7
7
Scored (.. )
8
8
) where
9
9
@@ -29,7 +29,6 @@ data Scored a = Scored {score :: !Int, original:: !a}
29
29
-- Just 5
30
30
--
31
31
{-# INLINABLE match #-}
32
-
33
32
match :: T. Text -- ^ Pattern in lowercase except for first character
34
33
-> T. Text -- ^ The text to search in.
35
34
-> Maybe Int -- ^ The score
@@ -70,22 +69,13 @@ match (T.Text pArr pOff pLen) (T.Text sArr sOff sLen) = go 0 1 pOff sOff
70
69
71
70
toLowerAscii w = if (w - 65 ) < 26 then w .|. 0x20 else w
72
71
73
- -- | The function to filter a list of values by fuzzy search on the text extracted from them.
74
- filter :: Int -- ^ Chunk size. 1000 works well.
75
- -> Int -- ^ Max. number of results wanted
76
- -> T. Text -- ^ Pattern.
77
- -> [t ] -- ^ The list of values containing the text to search in.
78
- -> (t -> T. Text ) -- ^ The function to extract the text from the container.
79
- -> [Scored t ] -- ^ The list of results, sorted, highest score first.
80
- filter chunkSize maxRes pattern ts extract = partialSortByAscScore maxRes perfectScore (concat vss)
81
- where
82
- -- Preserve case for the first character, make all others lowercase
83
- pattern' = case T. uncons pattern of
84
- Just (c, rest) -> T. cons c (T. toLower rest)
85
- _ -> pattern
86
- vss = map (mapMaybe (\ t -> flip Scored t <$> match pattern' (extract t))) (chunkList chunkSize ts)
87
- `using` parList (evalList rseq)
88
- perfectScore = fromMaybe (error $ T. unpack pattern ) $ match pattern' pattern'
72
+ -- | Sensible default value for chunk size to use when calling simple filter.
73
+ defChunkSize :: Int
74
+ defChunkSize = 1000
75
+
76
+ -- | Sensible default value for the number of max results to use when calling simple filter.
77
+ defMaxResults :: Int
78
+ defMaxResults = 10
89
79
90
80
-- | Return all elements of the list that have a fuzzy
91
81
-- match against the pattern. Runs with default settings where
@@ -102,6 +92,52 @@ simpleFilter :: Int -- ^ Chunk size. 1000 works well.
102
92
simpleFilter chunk maxRes pattern xs =
103
93
filter chunk maxRes pattern xs id
104
94
95
+
96
+ -- | The function to filter a list of values by fuzzy search on the text extracted from them,
97
+ -- using a custom matching function which determines how close words are.
98
+ filter' :: Int -- ^ Chunk size. 1000 works well.
99
+ -> Int -- ^ Max. number of results wanted
100
+ -> T. Text -- ^ Pattern.
101
+ -> [t ] -- ^ The list of values containing the text to search in.
102
+ -> (t -> T. Text ) -- ^ The function to extract the text from the container.
103
+ -> (T. Text -> T. Text -> Maybe Int )
104
+ -- ^ Custom scoring function to use for calculating how close words are
105
+ -- When the function returns Nothing, this means the values are incomparable.
106
+ -> [Scored t ] -- ^ The list of results, sorted, highest score first.
107
+ filter' chunkSize maxRes pattern ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
108
+ where
109
+ -- Preserve case for the first character, make all others lowercase
110
+ pattern' = case T. uncons pattern of
111
+ Just (c, rest) -> T. cons c (T. toLower rest)
112
+ _ -> pattern
113
+ vss = map (mapMaybe (\ t -> flip Scored t <$> match' pattern' (extract t))) (chunkList chunkSize ts)
114
+ `using` parList (evalList rseq)
115
+ perfectScore = fromMaybe (error $ T. unpack pattern ) $ match' pattern' pattern'
116
+
117
+ -- | The function to filter a list of values by fuzzy search on the text extracted from them,
118
+ -- using a custom matching function which determines how close words are.
119
+ filter :: Int -- ^ Chunk size. 1000 works well.
120
+ -> Int -- ^ Max. number of results wanted
121
+ -> T. Text -- ^ Pattern.
122
+ -> [t ] -- ^ The list of values containing the text to search in.
123
+ -> (t -> T. Text ) -- ^ The function to extract the text from the container.
124
+ -> [Scored t ] -- ^ The list of results, sorted, highest score first.
125
+ filter chunkSize maxRes pattern ts extract =
126
+ filter' chunkSize maxRes pattern ts extract match
127
+
128
+ -- | Return all elements of the list that have a fuzzy match against the pattern,
129
+ -- the closeness of the match is determined using the custom scoring match function that is passed.
130
+ -- Runs with default settings where nothing is added around the matches, as case insensitive.
131
+ {-# INLINABLE simpleFilter' #-}
132
+ simpleFilter' :: Int -- ^ Chunk size. 1000 works well.
133
+ -> Int -- ^ Max. number of results wanted
134
+ -> T. Text -- ^ Pattern to look for.
135
+ -> [T. Text ] -- ^ List of texts to check.
136
+ -> (T. Text -> T. Text -> Maybe Int )
137
+ -- ^ Custom scoring function to use for calculating how close words are
138
+ -> [Scored T. Text ] -- ^ The ones that match.
139
+ simpleFilter' chunk maxRes pattern xs match' =
140
+ filter' chunk maxRes pattern xs id match'
105
141
--------------------------------------------------------------------------------
106
142
107
143
chunkList :: Int -> [a ] -> [[a ]]
0 commit comments