1+ use std:: any:: Any ;
2+ use datafusion:: arrow:: array:: StringArray ;
3+ use datafusion:: arrow:: datatypes:: { DataType , DataType :: Utf8 } ;
4+ use datafusion:: common:: { exec_err, Result as DataFusionResult , ScalarValue } ;
5+ use datafusion:: logical_expr:: { ColumnarValue , ScalarFunctionArgs , ScalarUDFImpl , Signature , Volatility } ;
6+ use jsonpath_rust:: parser:: model:: { Segment , Selector } ;
7+ use jsonpath_rust:: parser:: parse_json_path;
8+ use crate :: common:: { invoke, return_type_check, JsonPath } ;
9+ use crate :: common_macros:: make_udf_function;
10+ use crate :: json_get_json:: jiter_json_get_json;
11+
12+ make_udf_function ! (
13+ JsonExtract ,
14+ json_extract,
15+ json_data path,
16+ r#"Get a value from a JSON string by its "path" in JSONPath format"#
17+ ) ;
18+
19+ #[ derive( Debug ) ]
20+ pub ( super ) struct JsonExtract {
21+ signature : Signature ,
22+ aliases : [ String ; 1 ] ,
23+ }
24+
25+ impl Default for JsonExtract {
26+ fn default ( ) -> Self {
27+ Self {
28+ signature : Signature :: exact (
29+ vec ! [ Utf8 , Utf8 ] , // JSON data and JSONPath as strings
30+ Volatility :: Immutable ,
31+ ) ,
32+ aliases : [ "json_extract" . to_string ( ) ] ,
33+ }
34+ }
35+ }
36+
37+ impl ScalarUDFImpl for JsonExtract {
38+ fn as_any ( & self ) -> & dyn Any {
39+ self
40+ }
41+
42+ fn name ( & self ) -> & str {
43+ self . aliases [ 0 ] . as_str ( )
44+ }
45+
46+ fn signature ( & self ) -> & Signature {
47+ & self . signature
48+ }
49+
50+ fn return_type ( & self , arg_types : & [ DataType ] ) -> DataFusionResult < DataType > {
51+ return_type_check ( arg_types, self . name ( ) , Utf8 )
52+ }
53+
54+ fn invoke_with_args ( & self , args : ScalarFunctionArgs ) -> DataFusionResult < ColumnarValue > {
55+ if args. args . len ( ) != 2 {
56+ return exec_err ! (
57+ "'{}' expects exactly 2 arguments (JSON data, path), got {}" ,
58+ self . name( ) ,
59+ args. args. len( )
60+ ) ;
61+ }
62+
63+ let json_arg = & args. args [ 0 ] ;
64+ let path_arg = & args. args [ 1 ] ;
65+
66+ let path_str = match path_arg {
67+ ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( s) ) ) => s,
68+ _ => return exec_err ! ( "'{}' expects a valid JSONPath string (e.g., '$.key[0]') as second argument" , self . name( ) ) ,
69+ } ;
70+
71+ let path = parse_jsonpath ( path_str) ;
72+
73+ invoke :: < StringArray > ( & [ json_arg. clone ( ) ] , |json, _| {
74+ jiter_json_get_json ( json, & path)
75+ } )
76+ }
77+
78+ fn aliases ( & self ) -> & [ String ] {
79+ & self . aliases
80+ }
81+ }
82+
83+ fn parse_jsonpath ( path : & str ) -> Vec < JsonPath < ' static > > {
84+ let segments = parse_json_path ( path)
85+ . map ( |it| it. segments )
86+ . unwrap_or ( Vec :: new ( ) ) ;
87+
88+ segments. into_iter ( ) . map ( |segment| {
89+ match segment {
90+ Segment :: Selector ( s) => match s {
91+ Selector :: Name ( name) => JsonPath :: Key ( Box :: leak ( name. into_boxed_str ( ) ) ) ,
92+ Selector :: Index ( idx) => JsonPath :: Index ( idx as usize ) ,
93+ _ => JsonPath :: None ,
94+ } ,
95+ _ => JsonPath :: None ,
96+ }
97+ } ) . collect :: < Vec < _ > > ( )
98+ }
99+
100+ #[ cfg( test) ]
101+ mod tests {
102+ use rstest:: rstest;
103+ use super :: * ;
104+
105+ // Test cases for parse_jsonpath
106+ #[ rstest]
107+ #[ case( "$.a.aa" , vec![ JsonPath :: Key ( "a" ) , JsonPath :: Key ( "aa" ) ] ) ]
108+ #[ case( "$.a.ab[0].ac" , vec![ JsonPath :: Key ( "a" ) , JsonPath :: Key ( "ab" ) , JsonPath :: Index ( 0 ) , JsonPath :: Key ( "ac" ) ] ) ]
109+ #[ case( "$.a.ab[1].ad" , vec![ JsonPath :: Key ( "a" ) , JsonPath :: Key ( "ab" ) , JsonPath :: Index ( 1 ) , JsonPath :: Key ( "ad" ) ] ) ]
110+ #[ case( r#"$.a["a b"].ad"# , vec![ JsonPath :: Key ( "a" ) , JsonPath :: Key ( "\" a b\" " ) , JsonPath :: Key ( "ad" ) ] ) ]
111+ #[ tokio:: test]
112+ async fn test_parse_jsonpath (
113+ #[ case] path : & str ,
114+ #[ case] expected : Vec < JsonPath < ' static > > ,
115+ ) {
116+ let result = parse_jsonpath ( path) ;
117+ assert_eq ! ( result, expected) ;
118+ }
119+ }
0 commit comments