|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 |
|
18 | | -from abc import ABCMeta, abstractmethod |
19 | | -from typing import List |
| 18 | +"""DataFusion python package. |
| 19 | +
|
| 20 | +This is a Python library that binds to Apache Arrow in-memory query engine DataFusion. |
| 21 | +See https://datafusion.apache.org/python for more information. |
| 22 | +""" |
20 | 23 |
|
21 | 24 | try: |
22 | 25 | import importlib.metadata as importlib_metadata |
23 | 26 | except ImportError: |
24 | 27 | import importlib_metadata |
25 | 28 |
|
26 | | -import pyarrow as pa |
27 | | - |
28 | | -from ._internal import ( |
29 | | - AggregateUDF, |
30 | | - Config, |
31 | | - DataFrame, |
| 29 | +from .context import ( |
32 | 30 | SessionContext, |
33 | 31 | SessionConfig, |
34 | 32 | RuntimeConfig, |
35 | | - ScalarUDF, |
36 | 33 | SQLOptions, |
37 | 34 | ) |
38 | 35 |
|
| 36 | +# The following imports are okay to remain as opaque to the user. |
| 37 | +from ._internal import Config |
| 38 | + |
| 39 | +from .udf import ScalarUDF, AggregateUDF, Accumulator |
| 40 | + |
39 | 41 | from .common import ( |
40 | 42 | DFSchema, |
41 | 43 | ) |
42 | 44 |
|
| 45 | +from .dataframe import DataFrame |
| 46 | + |
43 | 47 | from .expr import ( |
44 | | - Alias, |
45 | | - Analyze, |
46 | 48 | Expr, |
47 | | - Filter, |
48 | | - Limit, |
49 | | - Like, |
50 | | - ILike, |
51 | | - Projection, |
52 | | - SimilarTo, |
53 | | - ScalarVariable, |
54 | | - Sort, |
55 | | - TableScan, |
56 | | - Not, |
57 | | - IsNotNull, |
58 | | - IsTrue, |
59 | | - IsFalse, |
60 | | - IsUnknown, |
61 | | - IsNotTrue, |
62 | | - IsNotFalse, |
63 | | - IsNotUnknown, |
64 | | - Negative, |
65 | | - InList, |
66 | | - Exists, |
67 | | - Subquery, |
68 | | - InSubquery, |
69 | | - ScalarSubquery, |
70 | | - GroupingSet, |
71 | | - Placeholder, |
72 | | - Case, |
73 | | - Cast, |
74 | | - TryCast, |
75 | | - Between, |
76 | | - Explain, |
77 | | - CreateMemoryTable, |
78 | | - SubqueryAlias, |
79 | | - Extension, |
80 | | - CreateView, |
81 | | - Distinct, |
82 | | - DropTable, |
83 | | - Repartition, |
84 | | - Partitioning, |
85 | | - Window, |
86 | 49 | WindowFrame, |
87 | 50 | ) |
88 | 51 |
|
89 | 52 | __version__ = importlib_metadata.version(__name__) |
90 | 53 |
|
91 | 54 | __all__ = [ |
| 55 | + "Accumulator", |
92 | 56 | "Config", |
93 | 57 | "DataFrame", |
94 | 58 | "SessionContext", |
95 | 59 | "SessionConfig", |
96 | 60 | "SQLOptions", |
97 | 61 | "RuntimeConfig", |
98 | 62 | "Expr", |
99 | | - "AggregateUDF", |
100 | 63 | "ScalarUDF", |
101 | | - "Window", |
102 | 64 | "WindowFrame", |
103 | 65 | "column", |
104 | 66 | "literal", |
105 | | - "TableScan", |
106 | | - "Projection", |
107 | 67 | "DFSchema", |
108 | | - "DFField", |
109 | | - "Analyze", |
110 | | - "Sort", |
111 | | - "Limit", |
112 | | - "Filter", |
113 | | - "Like", |
114 | | - "ILike", |
115 | | - "SimilarTo", |
116 | | - "ScalarVariable", |
117 | | - "Alias", |
118 | | - "Not", |
119 | | - "IsNotNull", |
120 | | - "IsTrue", |
121 | | - "IsFalse", |
122 | | - "IsUnknown", |
123 | | - "IsNotTrue", |
124 | | - "IsNotFalse", |
125 | | - "IsNotUnknown", |
126 | | - "Negative", |
127 | | - "ScalarFunction", |
128 | | - "BuiltinScalarFunction", |
129 | | - "InList", |
130 | | - "Exists", |
131 | | - "Subquery", |
132 | | - "InSubquery", |
133 | | - "ScalarSubquery", |
134 | | - "GroupingSet", |
135 | | - "Placeholder", |
136 | | - "Case", |
137 | | - "Cast", |
138 | | - "TryCast", |
139 | | - "Between", |
140 | | - "Explain", |
141 | | - "SubqueryAlias", |
142 | | - "Extension", |
143 | | - "CreateMemoryTable", |
144 | | - "CreateView", |
145 | | - "Distinct", |
146 | | - "DropTable", |
147 | | - "Repartition", |
148 | | - "Partitioning", |
149 | 68 | ] |
150 | 69 |
|
151 | 70 |
|
152 | | -class Accumulator(metaclass=ABCMeta): |
153 | | - @abstractmethod |
154 | | - def state(self) -> List[pa.Scalar]: |
155 | | - pass |
156 | | - |
157 | | - @abstractmethod |
158 | | - def update(self, values: pa.Array) -> None: |
159 | | - pass |
160 | | - |
161 | | - @abstractmethod |
162 | | - def merge(self, states: pa.Array) -> None: |
163 | | - pass |
164 | | - |
165 | | - @abstractmethod |
166 | | - def evaluate(self) -> pa.Scalar: |
167 | | - pass |
168 | | - |
169 | | - |
170 | | -def column(value): |
| 71 | +def column(value: str): |
| 72 | + """Create a column expression.""" |
171 | 73 | return Expr.column(value) |
172 | 74 |
|
173 | 75 |
|
174 | 76 | col = column |
175 | 77 |
|
176 | 78 |
|
177 | 79 | def literal(value): |
178 | | - if not isinstance(value, pa.Scalar): |
179 | | - value = pa.scalar(value) |
| 80 | + """Create a literal expression.""" |
180 | 81 | return Expr.literal(value) |
181 | 82 |
|
182 | 83 |
|
183 | 84 | lit = literal |
184 | 85 |
|
| 86 | +udf = ScalarUDF.udf |
185 | 87 |
|
186 | | -def udf(func, input_types, return_type, volatility, name=None): |
187 | | - """ |
188 | | - Create a new User Defined Function |
189 | | - """ |
190 | | - if not callable(func): |
191 | | - raise TypeError("`func` argument must be callable") |
192 | | - if name is None: |
193 | | - name = func.__qualname__.lower() |
194 | | - return ScalarUDF( |
195 | | - name=name, |
196 | | - func=func, |
197 | | - input_types=input_types, |
198 | | - return_type=return_type, |
199 | | - volatility=volatility, |
200 | | - ) |
201 | | - |
202 | | - |
203 | | -def udaf(accum, input_type, return_type, state_type, volatility, name=None): |
204 | | - """ |
205 | | - Create a new User Defined Aggregate Function |
206 | | - """ |
207 | | - if not issubclass(accum, Accumulator): |
208 | | - raise TypeError("`accum` must implement the abstract base class Accumulator") |
209 | | - if name is None: |
210 | | - name = accum.__qualname__.lower() |
211 | | - if isinstance(input_type, pa.lib.DataType): |
212 | | - input_type = [input_type] |
213 | | - return AggregateUDF( |
214 | | - name=name, |
215 | | - accumulator=accum, |
216 | | - input_type=input_type, |
217 | | - return_type=return_type, |
218 | | - state_type=state_type, |
219 | | - volatility=volatility, |
220 | | - ) |
| 88 | +udaf = AggregateUDF.udaf |
0 commit comments