$LmB*!j27kmqyNJt&QV#4oV
zK)3!FR8*j8sv8Z-4I|_nP*Z`CxfG+ikj6c8GL{>rbDPHSo>>E_+peoLgL)~
z@xv&Z!t~>?AaJQ`lv_#etbcECxSuNLSSVTsEV`&I57JSfIq`wUUdw=;F>!K`I?cAa
zv4~89>qkIaue{NDynw6_-Hr)yTooQ=1U}2jj*O0%?3vZIfhv@S=!Sm#d?p^}m#5-N
z&>1eTel;|^dZvd!Qyj3>dwqn0z`_pJv{O9Z+qY+mw~{{FXfJI(u5s|fN~fe6`z*6;
zHU4sTDhGVJ=lMxQ8xx94O|IKxTOZ$jOrLh}GK^-BBQ0~QdPE)bw~)e9ZyYCcofk1g
z`t=!eCVnnc;q(?2^l=Y2FGuFudT$^q>M%aicUP%5-84u?MEyxwf%pU9ziE;`JpDe1
zu~;Zo*W;eJ9++*LV9(NrLFg*TVtiNw7C5;HX7zXMf1T5TP^q3CRr
z`K1l+`Seg1-{Ud(Y1#Az-q}@ZFd&Y{lQdf
z;@ur9r!?HH?W;Llfr{GQBi{W*b$37O>$X1M#aKvApwIjTUf{kKI3p6=Gj29C5bfj8
zR=Z#Rx@z$Q^zAg|%!m^YA3;^anA8F=7H8*r|BsQno%$fTHzf}5aa9reBa9to$2(^|
zLwZizfabOr|9TfG0u|+0YY(YDq}NLvR1@Cx;-
zykj0GG3&RVn>-HAosRoj>Ks+NJP#3A^>-I}U3v7laqU-b@h1RzZqm%~lW=-XR!gdx
zz$}~m?z|Llrn5p|N9p35l}SYtof|wVU8TWup46ur=s1X!;12N(NBQyPJzqarm_BL*
zJ{Tak@_p#^7A&DyB~dWD%nQ(v2UZ@@CH(ZGAxpFTs1onXhPP=&n-S$Q{GoA7{(~^I
z`Z$&1Ac5~Ru5lD&pzX;s0Dg-R@vtb4{5tQ>rO_V7*eH
zIse*8)U!8P6@8pM6l>I;l!%cp!8~}7y5}0#RnoL!1iPh*RZ!>geEU12h?oB*xt
zCT>stHLrR}*D8n|9fbSYpVoAJ7d09-+JN)@mp-DNbkN^r*pF}|T_TldK|!M;)fedo
z<9KgvP|grky3~>G-ADdp?1-!fo0g>1le!&MEbrGNQ!=_Hj$vagy_YY}A2OF4RqQSvf`zdl2+7tH^$sB=Hx-kuB3zdB2v$J%1
zzkHhS3$-n!T`C
zxxJz~pJ9r6G4AHT5^cpxwYGf&M6{fmq&w03bu_$4Eal2nIk}Bv`hDxHa%Dt9cuUcA9Fv8War+^*
zg5IWul24AX(H);{B|Iy(1E#ERK0f)I+C22LWq>T@3A)nz{!d}wMCztfsl)yBX$S4W
z(CtBfh$BBH#^I;L@^(Q3CO2W4=2s}GMf`QBJt<(3y*?{$)7A#dkhBGyUi4L&*eQ^bHGH*43Ccn)9b#d
zS;|eK^^Z$HDwM8mO0y-RjGdO_4K?_4XA&8&E4`rPQ*&qsMSHF#
zc!q$=a5CsSMjALi-O!qQPIp0i27TUqhd0J3q5%BsqO;`M54)}@JY}4`w1YP+WogqvaEz
z_#0!MC>NT>r>+}sELAQuUN>v^E}Pb#cl&CNKR+RDV>-kRMY=)ccZZxYiXQf>SExBp
zm%>km+%$usuuR9r^Jt`hlX4@;MZk}sl7Tx;XusoxJDB!jhIRXyDvipma`ZO$j=%SZ
zI@^=i>(a=U{M@RqE5nTRhfV~dX#d8uoubGt2;QCa4!}k{K%48^_#FM+6NYhyW8~rw
zt_UyaW86k74YJU-KL?F=};_$
z>{7A7!0?O@b0sG&osj=V`+Mk{PsiLG{D2$0y5TzdFIVI!rYZZelBp*`n%WiZiFJSFlZ+*~2m
zmZPSZf{pA8qN7~%>*d$8Sp?4k0zR+cbAJS=loFN9xVlDbc+~gfZio)x_i*DmdYivx
zj_4S0L%84$2cmuE?wD)OrYs|k`K0u`!53ZCYT|O5e6Dr)?UVY-t%|jj2~*>~FNvHopG{
zWgdO4ccxx+ErLV{))I}stoH3leq+ebRkoTV8eCea#Wea^f6j$xH(qIizmYM>v*WJM
z>c)X#UGyQo3SUc_GHp59h?
ztGiBJ_DYkGby2k4l&ztFGXYao8h(e-^jevr3r|by{4XJjeT_xoM?_-ZC
z%QxmxqA$CqY-kdKZv0j9|7`1noxi;O&;p-c2=b>A?d!f&D_hKbMz=~7HcK=9qomsG
z0ox12rz)~R@@exA1x1I^YoaCQMx|x%h{sv8VEM^@_&asS%>oDdT0~=fu8%gT5+s?S
zESKB|EX&Mpq0is#(}`@K+qq_L8Ntp=QYBgo(FfI3Yuz;`%|xJ#*@wK3e4+Wd5=aq|
zZrHWfEkz^C)O~^D=L~YHYta!w_VS$0S~UIL^FETh$N8#Zx(_>ynnz*~7Uzu;
zc)!CG;A~8gV9ArwH%2kTNgMs2?}IR5v*SR8r?{HC-RFR3v$+LD?oQ%)>n#p@bbwR=
zWsmY`!SR?Hgbd{sUqQYXnupxO?Bjai`ujF^6j{T!#%@+;d+|edWKhErT&QQuup=^b
zSO7D3V!IA_xWYbEs4fdEg(Rfs#S9sHKtEPIg-vccYILX9vVWTG{!rt=%unJ|@3H?K
zxO>RdD|l}=(+GRCN!B2E#NP6>Cfey*#TU273eG>pjNLuDbm#l{ZZxj~dO4
zHffKz=b3`zLPhGKUX{&HD3KlzmR|FBT$?<IHBj#6HU1Hm`7agR+NbnxIqevFBi#2L{J&r;ew
z_K~@Ik5#9;){2}>zPo2zpYP7Z{Vf#i9YmHJ3Xz~S
zVJaE>6d^yY9XFS3o&Y_5Zf$q`zVX+`^@*VrX*c1%K?(-0X?+y+Rq2vmp#^q4MoJo3
zDIr9d|HGPeu^egYybu4(y42Yj$zS8~feS_B5Eo9FMD9zI;_+6fl_uadN&8#K$!n83
zEtaJ;N}dy;vP)K(!}`efw~;q;=T5%C)lEk~=OnUJQ!`92-@k1R6Q_#dtzG*3XsFJL
zzmb^X*{Mzcq76wSbxx_|WSvuI*|*qo#C2mGFKb7uZF+a*kyQQ_1|ZIU#m8=0Xkj8w
z|MgS-28L6eza^6?!><)4%ZlLNTLcnAcXzni+K!5;aIQiv-)YE}V)l@R$f!vZiKy9e
zo~8(Su{SRQ-c+1VGM$hea=7Mpgx+4WG$vu2^5ZM+-eJv0fRtN3?eAa#DMs=mCgtNl
zLlTn^2rCXpbtUrj$_AY#EKd=){sQ$a#cQ2X{AxbSlMGo|tt4^dOd5fpe*1;0s2vz|
zFT3)jL$aP4X}QKI`&Y`35L4Y6rWU#M(yh5Wo~oG6w&886$R%ltU}Z*uz=knTuAqrG
zSaIURiDcZ)bc4r_Oo|yH%4$VtG}pZG9&v>HN1NLfm7faqbE`@)=^|r3=Pe;>5(aPq
z)R{&$EmEi6=g;Y6AXEOg!b4evGQLxwE(dv9K}nh}ci6zPbKS!o>Z288IL-Bs6wrwn
zXp&!%5=ZxRMYZs+qLVeKRbseSH7^bR?1tlYt$N5njaRT@W;Spy1(aHUe*0HguTFT<
zTxZR-S-yDYx3Qpqw}CsO2bBsm)_@PbsBzE*Y0x5AA
z?K%P06E%U@;0@+p(96>!K&;t*s6ydu{X;T|El*5bcc)@kqpFz=mP6ms=cHz~NYg}Q
zDU#7>qh@F&Sii#11ikg#>aWcG3@#q>E!V`Dp0NzAPnsK^jSB=;#`xjBp4E~<06`40=spIqrJi}Y|{2s`Eb
zbI$Lr_Z)B`7LQSLbJ{r{uhV4hwWsT8&xn4?iu$;}d8Wj;Iv7yX$jR_R!_2r^$+crS
zGj?{T@qSHq4jU^38zF&>;{VW|e+4O0L7D~PW6N3L#@
zBZI5sx{(Dv*6AZe)wVJY7nJ@nL$yD-!XhmK^GJ=YpeDJ`PBpygfPi5W70sO6x+
z%Z$O1oslY=!g6!gmi-*$3qZpPPX6$`v#}{lTO3n*skOj?J?0VGfH!X%#7y5cr)cD4ij%K}Yi)JC
z!DiRTH%}%=ZtlNvPYccq2$e7rw!Gby1}`Gsn}4M;n@8lj9_wri7KTNCB6cEc@7Saq
zBD&F1987vMqPpc*oyXZa#hhSIc0h>u=g4ptoH+^pzyS3J_scU{D7t*>xzCC{<7(0A
zsLhmYux#71RY>}3@XYd3blujEw4X`K=kqW?1US+49{Z=sTuft3mCv@rvyOX7W>Ck6
zvOUs?IfWV#2#BR%P~Gg>&S5FKLL^(O}eW
zGOw;!n7QTEZZ%iiwYh+gF+&teTZYc8txfQ>0q`@VqN@WOo
zvnc~wwU-HV$^gdn4QC>20HxOWsJ@z8Hh{n2v%`xy?U|G(kFl>dtaE&ZU0AS+wT~Sg
zk?2zO%2mDivR^~zKZZ{IRE6vPJN4VqLgdSK*ap++SxBWfh<%80K
zTnvzMf{sE#GvJfpZG~>*;C5V_Yt~zFOQn^jstjsQ}i4`$Jogcf}@Nn>PgGOnCMKTb2g<9P%
zGe5ZxT~M4@U^FytEaVyoU9Zou14DFd^RqIZ3a?u_&73+$97WMNR^w#7M$M;Gg0<9k
z#Hi#1%sKRhY7SJr9*Km(wFse$$p)N!MQhi89hL%9Ar8lk>vJvgTLUtf>3QT6ea+aBM!m1Ey=jceWK(JEKKZ{^ZDB>Z`E;7*Gf`>Y*VqTud(Rv|1q0!y^cHJckz
z?x*`AAP?F0%hL~~!<|n@nFyKM)b`%!>!x#H=275eaYdT(?gZid>Rf*c;%xpIWCBN(
zKf4XZ7r7(z<#<;u7mx?zx+zX57=cbT`i$9B^!*_vUGhR1@eB7rRtMv3mg!oAL)6weeUp-B_P(+th40!
z7Aw|H2Z-lQwH3qy#KpD1FTN?nHEv_nR2>t!Ibq-ywg|UzeIaRs(H7tNJCcPTDwz0w
z{WfVr40IJT<2?o2Jv{32CA10sMj~Ep4$CT9hb~we2a%IzX+4M&Vj`?}wDUXPqAP}q
zu<;88C4cAC%sdnzA2m%0EoXyYSicWo1WKBu*SifmeXy{bd+hwkM~tCbRI;hMMpP90
zFy?qb>dUXE%rOehwg5l&GJzYI%B}AD#caIMnwEOfF8ZndoLB71YuFCB)}rX`ki5^(
z+44Y%x=E~xiz`k~EMdC;JB)sXt4FjyE_z+Yq=DctsEg^4?d`m#trKUfKNr2ODw2cm
r0K8TI;Ah*Lgn~^O_}c#<*+*fqEgee_bV&dE(ZJJq30A39wu<~eMRADb
literal 0
HcmV?d00001
From 0b8451d88901718b8b6dfd9576e6dd54341d32ad Mon Sep 17 00:00:00 2001
From: Elliot <36275109+Falcons-Royale@users.noreply.github.com>
Date: Fri, 6 Feb 2026 13:50:35 -0800
Subject: [PATCH 2/6] scala format fix
---
.../visualization/ternaryContour/TernaryContourOpDesc.scala | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
index 2e9bde676aa..1b5b39a293f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
@@ -65,7 +65,8 @@ class TernaryContourOpDesc extends PythonOperatorDescriptor {
override def operatorInfo: OperatorInfo =
OperatorInfo(
userFriendlyName = "Ternary Contour",
- operatorDescription = "A ternary contour plot shows how a measured value changes across all mixtures of three components that always sum to a constant (usually 100%).",
+ operatorDescription =
+ "A ternary contour plot shows how a measured value changes across all mixtures of three components that always sum to a constant (usually 100%).",
operatorGroupName = OperatorGroupConstants.VISUALIZATION_SCIENTIFIC_GROUP,
inputPorts = List(InputPort()),
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
From cfdad432c3eb1edaea46ad6f50ce626c5db0d244 Mon Sep 17 00:00:00 2001
From: carloea2
Date: Mon, 9 Feb 2026 01:54:47 -0600
Subject: [PATCH 3/6] feat(backend): introduce python code template builder for
creating Python based operators (#4189)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
### What changes were proposed in this PR?
This new PR introduces a PythonTemplateBuilder mechanism to create
Texera’s Python native operators. It refactors how Python code is
created using a new template concept, addressing prior issues with
string formatting. Previously, creating Python-based operators is via
raw string formatting, which is fragile: user text can contain `{}`,
`%`, quotes, or newlines that break formatting. This PR makes codegen
deterministic and safer by treating interpolated values as data
segments.
#### Design
**Diagram 1** (compile-time `pyb` expansion and validation)
This diagram describes the Scala compile-time flow when a developer
writes a `pyb"..."` template: the `pyb` macro receives the literal parts
and argument trees, verifies that literal segments are safe, classifies
each interpolated argument (plain text vs. encodable vs. nested
builder), and applies boundary validation to ensure encodable content
cannot “break out” of its intended Python context. Each argument is
evaluated once, runtime guards are injected when a nested builder is
spliced in, and the pieces are concatenated into a
`PythonTemplateBuilder`, which compacts adjacent text chunks and renders
an `encode()` output where encodable values become decode-at-runtime
segments before the generated Python is embedded into the operator
payload.
```mermaid
sequenceDiagram
participant Dev as Scala code
participant SC as StringContext
participant M as pyb macro
participant EI as EncodableInspector
participant BV as BoundaryValidator
participant PTB as PythonTemplateBuilder
Dev->>SC: pyb"t0 $a0 t1 $a1 t2"
SC->>M: parts + arg trees
M->>M: verify literal parts
M->>EI: classify args
loop each direct encodable arg
M->>BV: validateCompileTime(left,right,prefixLine)
BV-->>M: ok / abort
end
M->>M: eval each arg once into __pyb_argN
loop each nested builder arg
M->>BV: runtimeChecksForNestedBuilder(ctx,__pyb_argN)
BV-->>M: injected guard if unsafe
end
M->>PTB: concat parts + __pyb_argN
PTB-->>Dev: returns PythonTemplateBuilder
PTB->>PTB: compact adjacent Text chunks
PTB->>PTB: render Encode (encodable -> decode(base64))
PTB-->>Dev: encode() returns python source string
Dev->>Dev: embed generated python into operator payload
```
**Diagram 2** (end-to-end runtime flow: UI → descriptor → worker
decoding with cache)
This diagram illustrates the end-to-end pipeline from UI input to
execution: the UI submits parameters (including user-controlled strings)
to the Scala descriptor, where `pyb` expansion and
`PythonTemplateBuilder` assembly produce a deterministic Python source
string in “encode mode.” The encoded Python is embedded into the
workflow plan payload, dispatched by the workflow service to the Python
worker, and executed by the operator; during execution, the operator
uses `PythonTemplateDecoder` to recover user text by decoding each
encoded segment. An LRU cache (size 256) backs the decoder so repeated
encoded strings decode once and subsequently reuse cached UTF-8 strings,
reducing overhead while preserving strict decoding semantics.
```mermaid
sequenceDiagram
autonumber
participant UI as UI Web
participant DESC as Descriptor (Scala)
participant MAC as pyb macro (compile time)
participant PTB as PythonTemplateBuilder
participant PLAN as Plan payload
participant SVC as Workflow service
participant WK as Python worker
participant OP as Python Operator
participant DEC as PythonTemplateDecoder
participant CACHE as lru_cache 256
note over DESC,PTB: PyB related (Scala compile time codegen)
UI->>DESC: submit params + code strings
DESC->>MAC: pyb interpolation expands
MAC-->>DESC: expanded builder + validation logic
DESC->>PTB: assemble chunks (Text + Value)
PTB-->>DESC: rendered python source (encode mode)
note over DESC,WK: Plan + dispatch
DESC->>PLAN: embed python source into payload
PLAN->>SVC: submit workflow plan
SVC->>WK: dispatch operator payload
note over WK,DEC: Python runtime (worker executes generated source)
WK->>OP: start operator with python source
loop each encoded segment
OP->>DEC: decode(base64)
DEC->>CACHE: lookup(base64)
alt cache hit
CACHE-->>DEC: cached str
else cache miss
CACHE-->>DEC: miss
DEC->>DEC: base64 decode + utf8 strict
DEC->>CACHE: store(base64,str)
end
DEC-->>OP: recovered user text
end
OP-->>WK: execution continued
```
**Diagram 3** (test harness: generate code, reject raw-invalid,
`py_compile`)
This diagram shows the automated verification path for Python native
operators: ScalaTest uses ClassGraph to discover every
`PythonOperatorDescriptor`, instantiates each descriptor, inject invalid
raw strings into class fields marked with `Json` properties and calls
`generatePythonCode()` to produce the final Python source string. The
test asserts that no “RawInvalid” marker appears in the generated output
(indicating unsafe raw text did not leak), writes the source to a
temporary `source.py`, and runs `python -m py_compile` to ensure the
code is syntactically valid and compilable. Any raw-invalid leakage,
compile error, or timeout causes the test to fail, enforcing consistent
template-based code generation across operators.
```mermaid
sequenceDiagram
autonumber
participant TS as ScalaTest
participant CG as ClassGraph scanner
participant DESC as PythonOperatorDescriptor
participant GEN as generatePythonCode
participant SPEC as PythonCodeRawInvalidTextSpec
participant PY as python -m py_compile
participant FS as temp file (source.py)
TS->>CG: scan descriptors in packages
CG-->>TS: list of PythonOperatorDescriptor classes
loop each descriptor class
TS->>DESC: instantiate descriptor
TS->>GEN: call generatePythonCode(descriptor)
GEN-->>TS: python source string
TS->>SPEC: assert RawInvalid marker not present
alt marker leaked
SPEC-->>TS: FAIL (invalid raw text leaked)
else marker clean
SPEC-->>TS: OK
TS->>FS: write source to temp file
TS->>PY: py_compile(temp file)
alt compile error or timeout
PY-->>TS: FAIL (compile/timeout)
else compile ok
PY-->>TS: PASS
end
end
end
```
#### As a developer, how to use `pyb` to create your python-based
operators
1. **Use `EncodableString` for any UI/user-controlled text**
Before (raw `String`)
```scala
@JsonSchemaTitle("Ground Truth Attribute Column")
@AutofillAttributeName
var groundTruthAttribute: String = ""
@JsonSchemaTitle("Selected Features")
@AutofillAttributeNameList
var selectedFeatures: List[String] = _
```
After (`EncodableString`)
```scala
import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
@JsonSchemaTitle("Ground Truth Attribute Column")
@AutofillAttributeName
var groundTruthAttribute: EncodableString = ""
@JsonSchemaTitle("Selected Features")
@AutofillAttributeNameList
var selectedFeatures: List[EncodableString] = _
```
---
2. **Write Python using `pyb"""..."""` and interpolate values with
`$param`**
Before (string interpolation with manual quoting)
```scala
val code =
s"""
|y_train = self.dataset[\"$groundTruthAttribute\"]
|""".stripMargin
```
After (template + data: no manual quoting)
```scala
import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
val code = pyb"""
|y_train = self.dataset[$groundTruthAttribute]
|""".encode //Automatic stripMargin applied inside the builder
```
---
3. **For optional arguments, represent them as small `pyb` fragments,
then put them in the code template**
Before (manual string concatenation + quote juggling)
```scala
val colorArg = if (color.nonEmpty) s", color='$color'" else ""
val patternArg = if (pattern.nonEmpty) s", pattern_shape='$pattern'" else ""
val fig = s"fig = px.timeline(table, x_start='start', x_end='finish', y='task'$colorArg$patternArg)"
```
After (optional fragments are builders too)
```scala
val colorArg = if (color.nonEmpty) pyb", color=$color" else pyb"""
val patternArg = if (pattern.nonEmpty) pyb", pattern_shape=$pattern" else pyb"""
val fig = pyb"""fig = px.timeline(table, x_start=$start, x_end=$finish, y=$task$colorArg$patternArg)"""
```
---
4. **Return `.encode` from `generatePythonCode()`**
Before (returns raw string)
```scala
override def generatePythonCode(): String = {
val finalCode =
s"""
|from pytexera import *
|y_train = self.dataset[\"$groundTruthAttribute\"]
|""".stripMargin
finalCode
}
```
After (returns encoded output from builder)
```scala
override def generatePythonCode(): String = {
val finalCode = pyb"""
|from pytexera import *
|y_train = self.dataset[$groundTruthAttribute]
|"""
finalCode.encode
}
```
---
5. **Try to avoid the use of `s"..."`, `.format`, or `%` formatting for
Python codegen**
Before (`s` / `String.format` / `.format` patterns)
```scala
// s"..."
return s"""table[\"${ele.attribute}\"].values.shape[0]"""
// String.format / "{}" placeholders
workflowParam = workflowParam + String.format("%s = {},", ele.parameter.getName)
portParam = portParam + String.format("%s(table['%s'].values[i]),", ele.parameter.getType, ele.attribute)
```
After (`pyb` templates end-to-end)
```scala
return pyb"""table[${ele.attribute}].values.shape[0]"""
workflowParam = pyb"$workflowParam${ele.parameter.getName} = {},"
portParam = pyb"$portParam${ele.parameter.getType}(table[${ele.attribute}].values[i]),"
```
---
6. **Develop the unit tests in the new way**
Before (expects quoted literals like `'start'`)
```scala
assert(
opDesc.createPlotlyFigure().plain.contains(
"fig = px.timeline(table, x_start='start', x_end='finish', y='task' , color='color' )"
)
)
```
After (expects template output using variables, no embedded quotes)
```scala
assert(
opDesc.createPlotlyFigure().plain.contains(
"fig = px.timeline(table, x_start=start, x_end=finish, y=task , color=color )"
)
)
```
### Any related issues, documentation, discussions?
No
### How was this PR tested?
The PR includes a comprehensive set of tests to ensure the new
functionality works and that it doesn’t break existing workflows:
Unit Tests for PythonTemplateBuilder: New unit tests were added to
verify that PythonTemplateBuilder correctly classifies and encodes
segments. For example, tests likely feed in code strings with various
edge cases (braces, percentage signs, quotes, etc.) and assert that the
builder produces the expected spec output.
Unit Tests for PythonCodeRawInvalidTextSpec: 2 new unit test to
instantiate each Python Native Operator, and call `generatePythonCode`
method and checks the python code compiles and the string format is
consistent.
## Was this PR authored or co-authored using generative AI tooling?
Reviewed by ChatGPT 5.2
---
.github/workflows/github-action-build.yml | 6 +
amber/src/main/python/core/models/operator.py | 45 +-
build.sbt | 9 +-
common/pybuilder/build.sbt | 73 ++
.../amber/pybuilder/BoundaryValidator.scala | 187 ++++
.../amber/pybuilder/EncodableInspector.scala | 162 ++++
.../pybuilder/EncodableStringAnnotation.java | 34 +
.../amber/pybuilder/PythonLexerUtils.scala | 84 ++
.../pybuilder/PythonTemplateBuilder.scala | 481 ++++++++++
.../pybuilder/PythonLexerUtilsSpec.scala | 167 ++++
.../pybuilder/PythonTemplateBuilderSpec.scala | 598 ++++++++++++
common/workflow-operator/build.sbt | 2 +
...gingFaceIrisLogisticRegressionOpDesc.scala | 23 +-
.../HuggingFaceSentimentAnalysisOpDesc.scala | 18 +-
.../HuggingFaceSpamSMSDetectionOpDesc.scala | 18 +-
.../HuggingFaceTextSummarizationOpDesc.scala | 14 +-
.../Scorer/MachineLearningScorerOpDesc.scala | 20 +-
.../base/HyperParameters.scala | 5 +-
.../base/SklearnAdvancedBaseDesc.scala | 62 +-
.../sklearn/SklearnClassifierOpDesc.scala | 16 +-
.../SklearnLinearRegressionOpDesc.scala | 12 +-
.../sklearn/SklearnPredictionOpDesc.scala | 22 +-
.../training/SklearnTrainingOpDesc.scala | 16 +-
.../operator/sort/SortCriteriaUnit.scala | 3 +-
.../amber/operator/sort/SortOpDesc.scala | 7 +-
.../reddit/RedditSearchSourceOpDesc.scala | 26 +-
.../timeSeriesPlot/TimeSeriesPlot.scala | 30 +-
.../visualization/DotPlot/DotPlotOpDesc.scala | 21 +-
.../IcicleChart/IcicleChartOpDesc.scala | 31 +-
.../ImageViz/ImageVisualizerOpDesc.scala | 19 +-
.../ScatterMatrixChartOpDesc.scala | 23 +-
.../barChart/BarChartOpDesc.scala | 33 +-
.../boxViolinPlot/BoxViolinPlotOpDesc.scala | 35 +-
.../bubbleChart/BubbleChartOpDesc.scala | 39 +-
.../bulletChart/BulletChartOpDesc.scala | 22 +-
.../BulletChartStepDefinition.scala | 5 +-
.../CandlestickChartOpDesc.scala | 26 +-
.../choroplethMap/ChoroplethMapOpDesc.scala | 31 +-
.../continuousErrorBands/BandConfig.scala | 7 +-
.../ContinuousErrorBandsOpDesc.scala | 47 +-
.../contourPlot/ContourPlotOpDesc.scala | 28 +-
.../dendrogram/DendrogramOpDesc.scala | 33 +-
.../dumbbellPlot/DumbbellDotConfig.scala | 3 +-
.../dumbbellPlot/DumbbellPlotOpDesc.scala | 49 +-
.../FigureFactoryTableConfig.scala | 3 +-
.../FigureFactoryTableOpDesc.scala | 55 +-
.../filledAreaPlot/FilledAreaPlotOpDesc.scala | 55 +-
.../funnelPlot/FunnelPlotOpDesc.scala | 40 +-
.../ganttChart/GanttChartOpDesc.scala | 41 +-
.../gaugeChart/GaugeChartOpDesc.scala | 22 +-
.../gaugeChart/GaugeChartSteps.scala | 5 +-
.../visualization/heatMap/HeatMapOpDesc.scala | 23 +-
.../hierarchychart/HierarchyChartOpDesc.scala | 31 +-
.../hierarchychart/HierarchySection.scala | 3 +-
.../histogram/HistogramChartOpDesc.scala | 45 +-
.../histogram2d/Histogram2DOpDesc.scala | 28 +-
.../lineChart/LineChartOpDesc.scala | 37 +-
.../visualization/lineChart/LineConfig.scala | 9 +-
.../nestedTable/NestedTableConfig.scala | 7 +-
.../nestedTable/NestedTableOpDesc.scala | 18 +-
.../networkGraph/NetworkGraphOpDesc.scala | 34 +-
.../pieChart/PieChartOpDesc.scala | 31 +-
.../quiverPlot/QuiverPlotOpDesc.scala | 29 +-
.../rangeSlider/RangeSliderOpDesc.scala | 39 +-
.../sankeyDiagram/SankeyDiagramOpDesc.scala | 75 +-
.../scatter3DChart/Scatter3dChartOpDesc.scala | 61 +-
.../scatterplot/ScatterplotOpDesc.scala | 51 +-
.../stripChart/StripChartOpDesc.scala | 40 +-
.../tablesChart/TablesConfig.scala | 3 +-
.../tablesChart/TablesPlotOpDesc.scala | 42 +-
.../ternaryPlot/TernaryPlotOpDesc.scala | 39 +-
.../treeplot/TreeplotOpDesc.scala | 12 +-
.../volcanoPlot/VolcanoPlotOpDesc.scala | 20 +-
.../waterfallChart/WaterfallChartOpDesc.scala | 23 +-
.../wordCloud/WordCloudOpDesc.scala | 31 +-
.../timeSeriesPlot/TimeSeriesOpDescSpec.scala | 44 +
.../DotPlot/DotPlotOpDescSpec.scala | 3 +-
.../barChart/BarChartOpDescSpec.scala | 2 +-
.../bubbleChart/BubbleChartOpDescSpec.scala | 3 +-
.../ganttChart/GanttChartOpDescSpec.scala | 15 +-
.../HierarchyChartOpDescSpec.scala | 2 -
.../amber/pybuilder/DescriptorChecker.scala | 902 ++++++++++++++++++
.../pybuilder/PythonClassgraphScanner.scala | 56 ++
.../pybuilder/PythonConsoleCapture.scala | 44 +
.../PythonRawTextReportRenderer.scala | 53 +
.../pybuilder/PythonReflectionTextUtils.scala | 64 ++
.../pybuilder/PythonReflectionUtils.scala | 65 ++
.../util/PythonCodeRawInvalidTextSpec.scala | 266 ++++++
88 files changed, 4268 insertions(+), 795 deletions(-)
create mode 100644 common/pybuilder/build.sbt
create mode 100644 common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala
create mode 100644 common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableInspector.scala
create mode 100644 common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableStringAnnotation.java
create mode 100644 common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonLexerUtils.scala
create mode 100644 common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala
create mode 100644 common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonLexerUtilsSpec.scala
create mode 100644 common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilderSpec.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesOpDescSpec.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/DescriptorChecker.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonClassgraphScanner.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonConsoleCapture.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonRawTextReportRenderer.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionTextUtils.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionUtils.scala
create mode 100644 common/workflow-operator/src/test/scala/org/apache/texera/amber/util/PythonCodeRawInvalidTextSpec.scala
diff --git a/.github/workflows/github-action-build.yml b/.github/workflows/github-action-build.yml
index 4c79fdce800..646ce4a119f 100644
--- a/.github/workflows/github-action-build.yml
+++ b/.github/workflows/github-action-build.yml
@@ -104,6 +104,12 @@ jobs:
with:
distribution: 'temurin'
java-version: 11
+ - name: Setup Python for Scala tests
+ uses: actions/setup-python@v6
+ with:
+ python-version: '3.11'
+ - name: Show Python
+ run: python --version || python3 --version
- name: Setup sbt launcher
uses: sbt/setup-sbt@3e125ece5c3e5248e18da9ed8d2cce3d335ec8dd # v1.1.14
- uses: coursier/cache-action@4e2615869d13561d626ed48655e1a39e5b192b3c # v6.4.9
diff --git a/amber/src/main/python/core/models/operator.py b/amber/src/main/python/core/models/operator.py
index 4d3288c67a5..79050839958 100644
--- a/amber/src/main/python/core/models/operator.py
+++ b/amber/src/main/python/core/models/operator.py
@@ -17,20 +17,63 @@
import overrides
import pandas
+from functools import lru_cache
from abc import ABC, abstractmethod
from collections import defaultdict
-from typing import Iterator, List, Mapping, Optional, Union, MutableMapping
+from typing import Iterator, List, Mapping, Optional, Union, MutableMapping, Protocol
from . import Table, TableLike, Tuple, TupleLike, Batch, BatchLike
from .state import State
from .table import all_output_to_tuple
+import base64
+
class Operator(ABC):
"""
Abstract base class for all operators.
"""
+ class PythonTemplateDecoder:
+ class Decoder(Protocol):
+ """Pluggable base64 decoder interface."""
+
+ def to_str(self, data: Union[str, bytes]) -> str: ...
+
+ class StdlibBase64Decoder:
+ """Default decoder using Python's stdlib base64."""
+
+ def to_str(self, data: Union[str, bytes]) -> str:
+ b64_bytes = data.encode("ascii") if isinstance(data, str) else data
+ raw = base64.b64decode(b64_bytes, validate=False)
+ return raw.decode("utf-8", errors="strict")
+
+ def __init__(
+ self,
+ decoder: Optional["Operator.PythonTemplateDecoder.Decoder"] = None,
+ cache_size: int = 256,
+ ) -> None:
+ self._decoder = decoder or self.StdlibBase64Decoder()
+ self._decode_cached = self._build_cached_decoder(cache_size)
+
+ def _build_cached_decoder(self, cache_size: int):
+ @lru_cache(maxsize=cache_size)
+ def _cached(data: Union[str, bytes]) -> str:
+ return self._decoder.to_str(data)
+
+ return _cached
+
+ def decode(self, data: Union[str, bytes]) -> str:
+ return self._decode_cached(data)
+
+ def _get_template_decoder(self) -> "Operator.PythonTemplateDecoder":
+ if not hasattr(self, "_python_template_decoder"):
+ self._python_template_decoder = self.PythonTemplateDecoder(cache_size=256)
+ return self._python_template_decoder
+
+ def decode_python_template(self, data: Union[str, bytes]) -> str:
+ return self._get_template_decoder().decode(data)
+
__internal_is_source: bool = False
@property
diff --git a/build.sbt b/build.sbt
index 027775ff253..1e506e44f60 100644
--- a/build.sbt
+++ b/build.sbt
@@ -37,8 +37,15 @@ lazy val AccessControlService = (project in file("access-control-service"))
)
.configs(Test)
.dependsOn(DAO % "test->test", Auth % "test->test")
+
+//This Scala module defines a pyb"..." macro-based DSL for composing Python code templates as an immutable PythonTemplateBuilder.
+//Used mainly for Python Native Operators
+lazy val PyBuilder = (project in file("common/pybuilder"))
+ .configs(Test)
+ .dependsOn(DAO % "test->test") // test scope dependency
+
lazy val WorkflowCore = (project in file("common/workflow-core"))
- .dependsOn(DAO, Config)
+ .dependsOn(DAO, Config, PyBuilder)
.configs(Test)
.dependsOn(DAO % "test->test") // test scope dependency
lazy val ComputingUnitManagingService = (project in file("computing-unit-managing-service"))
diff --git a/common/pybuilder/build.sbt b/common/pybuilder/build.sbt
new file mode 100644
index 00000000000..ea17dec30cd
--- /dev/null
+++ b/common/pybuilder/build.sbt
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import scala.collection.Seq
+/////////////////////////////////////////////////////////////////////////////
+// Project Settings
+/////////////////////////////////////////////////////////////////////////////
+
+name := "pybuilder"
+organization := "org.apache"
+version := "1.0.0"
+
+scalaVersion := "2.13.12"
+
+enablePlugins(JavaAppPackaging)
+
+// Enable semanticdb for Scalafix
+ThisBuild / semanticdbEnabled := true
+ThisBuild / semanticdbVersion := scalafixSemanticdb.revision
+
+// Manage dependency conflicts by always using the latest revision
+ThisBuild / conflictManager := ConflictManager.latestRevision
+
+// Restrict parallel execution of tests to avoid conflicts
+Global / concurrentRestrictions += Tags.limit(Tags.Test, 1)
+
+
+/////////////////////////////////////////////////////////////////////////////
+// Compiler Options
+/////////////////////////////////////////////////////////////////////////////
+
+// Scala compiler options
+Compile / scalacOptions ++= Seq(
+ "-Xelide-below", "WARNING", // Turn on optimizations with "WARNING" as the threshold
+ "-feature", // Check feature warnings
+ "-deprecation", // Check deprecation warnings
+ "-Ywarn-unused:imports" // Check for unused imports
+)
+
+/////////////////////////////////////////////////////////////////////////////
+// Test-related Dependencies
+/////////////////////////////////////////////////////////////////////////////
+
+libraryDependencies ++= Seq(
+ "org.scalamock" %% "scalamock" % "5.2.0" % Test, // ScalaMock
+ "org.scalatest" %% "scalatest" % "3.2.15" % Test, // ScalaTest
+ "junit" % "junit" % "4.13.2" % Test, // JUnit
+ "com.novocode" % "junit-interface" % "0.11" % Test, // SBT interface for JUnit
+ "io.github.classgraph" % "classgraph" % "4.8.184" % Test,
+ "org.scala-lang" % "scala-compiler" % scalaVersion.value % Test
+
+)
+
+/////////////////////////////////////////////////////////////////////////////
+// Reflection-related Dependencies
+/////////////////////////////////////////////////////////////////////////////
+libraryDependencies ++= Seq(
+ "org.scala-lang" % "scala-reflect" % scalaVersion.value
+)
diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala
new file mode 100644
index 00000000000..8475661d733
--- /dev/null
+++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import scala.reflect.macros.blackbox
+
+/**
+ * Macro-only helper: validates boundaries for Encodable insertions.
+ *
+ * Compile-time: abort with good messages for direct Encodable args.
+ * Runtime: for nested builders (unknown content at compile time), generate a check that throws if the builder contains Encodable chunks.
+ */
+final class BoundaryValidator[C <: blackbox.Context](val c: C) {
+ import PythonLexerUtils._
+ import c.universe._
+
+ /**
+ * Centralized, templatized error messages (Option A).
+ *
+ * NOTE: This object lives inside the class so it can freely use string templates
+ * without any macro-context type gymnastics.
+ */
+ private object BoundaryErrors {
+
+ // Provide a hint that can differ between compile-time and runtime wording.
+ sealed trait RendererHint { def text: String }
+
+ case object CompileTimeHint extends RendererHint {
+ override val text: String =
+ "EncodableString renders as a Python expression (self.PythonTemplateDecoder.decode(...))"
+ }
+
+ case object RuntimeHint extends RendererHint {
+ override val text: String =
+ "EncodableString renders as a Python expression (self...decode(...))"
+ }
+
+ private def prefix(argNum1Based: Int): String =
+ s"pyb interpolator: @EncodableStringAnnotation argument #$argNum1Based "
+
+ def insideQuoted(argNum1Based: Int, hint: RendererHint): String =
+ prefix(argNum1Based) +
+ "appears inside a quoted Python string literal. " +
+ s"${hint.text}, so it must not be placed inside quotes."
+
+ def afterComment(argNum1Based: Int): String =
+ prefix(argNum1Based) +
+ "appears after a '#' comment marker on the same line."
+
+ def badLeftNeighbor(argNum1Based: Int, ch: Char): String =
+ prefix(argNum1Based) +
+ s"must not be immediately adjacent to '$ch' on the left. " +
+ "Add whitespace or punctuation to separate tokens."
+
+ def badRightNeighbor(argNum1Based: Int, ch: Char): String =
+ prefix(argNum1Based) +
+ s"must not be immediately adjacent to '$ch' on the right. " +
+ "Add whitespace or punctuation to separate tokens."
+ }
+
+ final case class CompileTimeContext(
+ leftPart: String,
+ rightPart: String,
+ prefixSource: String,
+ argIndex: Int,
+ errorPos: Position
+ )
+
+ final case class RuntimeContext(
+ leftPart: String,
+ rightPart: String,
+ prefixSource: String,
+ argIndex: Int
+ )
+
+ def validateCompileTime(ctx: CompileTimeContext): Unit = {
+ val prefixLine = lineTail(ctx.prefixSource)
+ val argNum = ctx.argIndex + 1
+
+ if (hasUnclosedQuote(prefixLine)) {
+ c.abort(
+ ctx.errorPos,
+ BoundaryErrors.insideQuoted(argNum, BoundaryErrors.CompileTimeHint)
+ )
+ }
+
+ if (hasCommentOutsideQuotes(prefixLine)) {
+ c.abort(
+ ctx.errorPos,
+ BoundaryErrors.afterComment(argNum)
+ )
+ }
+
+ if (ctx.leftPart.nonEmpty) {
+ val leftNeighbor = ctx.leftPart.charAt(ctx.leftPart.length - 1)
+ if (isBadNeighbor(leftNeighbor)) {
+ c.abort(
+ ctx.errorPos,
+ BoundaryErrors.badLeftNeighbor(argNum, leftNeighbor)
+ )
+ }
+ }
+
+ if (ctx.rightPart.nonEmpty) {
+ val rightNeighbor = ctx.rightPart.charAt(0)
+ if (isBadNeighbor(rightNeighbor)) {
+ c.abort(
+ ctx.errorPos,
+ BoundaryErrors.badRightNeighbor(argNum, rightNeighbor)
+ )
+ }
+ }
+ }
+
+ /**
+ * Generate runtime checks for nested PythonTemplateBuilder args.
+ *
+ * This is only emitted when the boundary context is unsafe. The runtime guard is:
+ * if (arg.containsEncodableString) throw ...
+ */
+ def runtimeChecksForNestedBuilder(ctx: RuntimeContext, argIdent: Tree): List[Tree] = {
+ val prefixLine = lineTail(ctx.prefixSource)
+ val argNum = ctx.argIndex + 1
+
+ val insideQuoted = hasUnclosedQuote(prefixLine)
+ val afterComment = hasCommentOutsideQuotes(prefixLine)
+
+ val leftNeighborOpt: Option[Char] =
+ if (ctx.leftPart.nonEmpty) Some(ctx.leftPart.charAt(ctx.leftPart.length - 1)) else None
+
+ val rightNeighborOpt: Option[Char] =
+ if (ctx.rightPart.nonEmpty) Some(ctx.rightPart.charAt(0)) else None
+
+ val throwStmts = List.newBuilder[Tree]
+
+ if (insideQuoted) {
+ val msg = BoundaryErrors.insideQuoted(argNum, BoundaryErrors.RuntimeHint)
+ throwStmts += q"throw new IllegalArgumentException(${Literal(Constant(msg))})"
+ }
+
+ if (afterComment) {
+ val msg = BoundaryErrors.afterComment(argNum)
+ throwStmts += q"throw new IllegalArgumentException(${Literal(Constant(msg))})"
+ }
+
+ leftNeighborOpt.foreach { ch =>
+ if (isBadNeighbor(ch)) {
+ val msg = BoundaryErrors.badLeftNeighbor(argNum, ch)
+ throwStmts += q"throw new IllegalArgumentException(${Literal(Constant(msg))})"
+ }
+ }
+
+ rightNeighborOpt.foreach { ch =>
+ if (isBadNeighbor(ch)) {
+ val msg = BoundaryErrors.badRightNeighbor(argNum, ch)
+ throwStmts += q"throw new IllegalArgumentException(${Literal(Constant(msg))})"
+ }
+ }
+
+ val throws = throwStmts.result()
+ if (throws.isEmpty) Nil
+ else {
+ List(q"""
+ if ($argIdent.containsEncodableString) {
+ ..$throws
+ }
+ """)
+ }
+ }
+}
diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableInspector.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableInspector.scala
new file mode 100644
index 00000000000..58bdcb649ec
--- /dev/null
+++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableInspector.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import scala.reflect.macros.blackbox
+
+/**
+ * Macro-only helper: inspects argument trees / types / symbols to decide if a value is Encodable-marked.
+ *
+ * NOTE: This must be context-bound because Tree/Type/Annotation are from `c.universe`.
+ */
+final class EncodableInspector[C <: blackbox.Context](val c: C) {
+
+ import c.universe._
+
+ private val stringRendererTpe: Type =
+ typeOf[
+ PythonTemplateBuilder.StringRenderer
+ ]
+
+ private val pythonTemplateBuilderTpe: Type =
+ typeOf[PythonTemplateBuilder]
+
+ // Previous/original approach: direct encodable args include values already wrapped as EncodableStringRenderer
+ private val encodableStringRendererTpe: Type =
+ typeOf[
+ PythonTemplateBuilder.EncodableStringRenderer
+ ]
+
+ // Keep this as a string so it also works if the annotation is referenced indirectly.
+ private val encodableStringAnnotationFqn =
+ "org.apache.texera.amber.EncodableStringAnn"
+
+ /**
+ * If we are pointing at a getter/accessor, hop to its accessed field symbol when possible.
+ *
+ * Why: Many annotations are placed on constructor params/fields, but call sites see the accessor.
+ */
+ private def safeAccessed(sym: Symbol): Symbol =
+ sym match {
+ case termAccessor: TermSymbol if termAccessor.isAccessor => termAccessor.accessed
+ case methodAccessor: MethodSymbol if methodAccessor.isAccessor => methodAccessor.accessed
+ case _ => sym
+ }
+
+ /** True if an annotation instance is @EncodableStringAnn. */
+ private def annIsEncodableString(annotation: Annotation): Boolean = {
+ val annotationType = annotation.tree.tpe
+ annotationType != null && (
+ annotationType.typeSymbol.fullName == encodableStringAnnotationFqn ||
+ (annotationType <:< typeOf[EncodableStringAnnotation])
+ )
+ }
+
+ /**
+ * True if a [[Type]] carries @EncodableStringAnnotation as a TYPE_USE annotation (via [[AnnotatedType]]).
+ *
+ * Walks common wrappers (existentials, refinements, type refs) to find nested annotations.
+ */
+ private def typeHasEncodableString(typeToCheck: Type): Boolean = {
+ def loop(t: Type): Boolean = {
+ if (t == null) false
+ else {
+ val widened = t.dealias.widen
+ widened match {
+ case AnnotatedType(anns, underlying) =>
+ anns.exists(annIsEncodableString) || loop(underlying)
+ case ExistentialType(_, underlying) =>
+ loop(underlying)
+ case RefinedType(parents, _) =>
+ parents.exists(loop)
+ case TypeRef(_, _, args) =>
+ args.exists(loop)
+ case other =>
+ val sym = other.typeSymbol
+ val symHasAnn =
+ sym != null && sym != NoSymbol && sym.annotations.exists(annIsEncodableString)
+ symHasAnn || other.typeArgs.exists(loop)
+ }
+ }
+ }
+
+ loop(typeToCheck)
+ }
+
+ /**
+ * Checks @EncodableStringAnnotation on either:
+ * - accessed symbol (field/param), or
+ * - type (TYPE_USE), via [[AnnotatedType]].
+ */
+ def treeHasEncodableString(tree: Tree): Boolean = {
+ val rawSym = tree.symbol
+ val symHasAnn =
+ rawSym != null && rawSym != NoSymbol && {
+ val accessed = safeAccessed(rawSym)
+ accessed != null && accessed != NoSymbol && accessed.annotations.exists(annIsEncodableString)
+ }
+
+ symHasAnn || (tree.tpe != null && typeHasEncodableString(tree.tpe))
+ }
+
+ def isPythonTemplateBuilderArg(argExpr: c.Expr[Any]): Boolean = {
+ val tpe = argExpr.tree.tpe
+ tpe != null && (tpe.dealias.widen <:< pythonTemplateBuilderTpe)
+ }
+
+ def isStringRendererArg(argExpr: c.Expr[Any]): Boolean = {
+ val tpe = argExpr.tree.tpe
+ tpe != null && (tpe.dealias.widen <:< stringRendererTpe)
+ }
+
+ /** True if the arg is Encodable (direct argument, not a nested builder). */
+ def isDirectEncodableStringArg(argExpr: c.Expr[Any]): Boolean = {
+ if (isPythonTemplateBuilderArg(argExpr)) false
+ else {
+ val tpe = argExpr.tree.tpe
+ // Previous/original behavior:
+ // - treat already-wrapped EncodableStringRenderer as encodable
+ // - OR detect @EncodableStringAnnotation on symbol/type
+ (tpe != null && (tpe.dealias.widen <:< encodableStringRendererTpe)) ||
+ treeHasEncodableString(argExpr.tree)
+ }
+ }
+
+ /**
+ * Wrap an argument expression as a [[PythonTemplateBuilder.StringRenderer]] AST node.
+ *
+ * Priority:
+ * 1) If it's already a StringRenderer, keep it (cast).
+ * 2) Else if Encodable-marked, wrap as EncodableStringRenderer.
+ * 3) Else wrap as PyLiteralStringRenderer.
+ */
+ def wrapArg(argExpr: c.Expr[Any]): Tree = {
+ val argTree = argExpr.tree
+ val argType = argTree.tpe
+
+ if (argType != null && (argType.dealias.widen <:< stringRendererTpe)) {
+ q"$argTree.asInstanceOf[_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder.StringRenderer]"
+ } else if (treeHasEncodableString(argTree)) {
+ q"_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder.EncodableStringRenderer($argTree.toString)"
+ } else {
+ q"_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PyLiteralStringRenderer($argTree.toString)"
+ }
+ }
+}
diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableStringAnnotation.java b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableStringAnnotation.java
new file mode 100644
index 00000000000..ea17e6d0130
--- /dev/null
+++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/EncodableStringAnnotation.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@Retention(RetentionPolicy.RUNTIME)
+@Target({
+ ElementType.FIELD,
+ ElementType.PARAMETER,
+ ElementType.TYPE_USE,
+ ElementType.LOCAL_VARIABLE
+})
+public @interface EncodableStringAnnotation {}
\ No newline at end of file
diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonLexerUtils.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonLexerUtils.scala
new file mode 100644
index 00000000000..08aac3a9e8a
--- /dev/null
+++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonLexerUtils.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+/**
+ * Pure helpers used by the macro for quick, best-effort Python lexical checks.
+ *
+ * These are intentionally *not* macro-dependent, so they can be unit tested normally.
+ */
+object PythonLexerUtils {
+
+ def isIdentChar(c: Char): Boolean = c.isLetterOrDigit || c == '_'
+
+ /** Characters that would make an Encodable-expression splice ambiguous/invalid if adjacent. */
+ def isBadNeighbor(c: Char): Boolean = c == '\'' || c == '"' || isIdentChar(c)
+
+ /** Returns the substring after the last newline (used to reason about the "current line" context). */
+ def lineTail(s: String): String = {
+ val lastNewlineIndex = s.lastIndexOf('\n')
+ if (lastNewlineIndex >= 0) s.substring(lastNewlineIndex + 1) else s
+ }
+
+ /**
+ * Detect whether the provided line tail contains an unclosed single or double quote.
+ *
+ * This is not a full Python parser; it is a small state machine tracking quote mode and escapes.
+ */
+ def hasUnclosedQuote(lineText: String): Boolean = {
+ var inSingleQuotes = false
+ var inDoubleQuotes = false
+ var escaped = false
+
+ var i = 0
+ while (i < lineText.length) {
+ val ch = lineText.charAt(i)
+ if (escaped) escaped = false
+ else if (ch == '\\') escaped = true
+ else if (!inDoubleQuotes && ch == '\'') inSingleQuotes = !inSingleQuotes
+ else if (!inSingleQuotes && ch == '"') inDoubleQuotes = !inDoubleQuotes
+ i += 1
+ }
+ inSingleQuotes || inDoubleQuotes
+ }
+
+ /**
+ * Detect whether the provided line tail contains a `#` that is outside of any quote context.
+ *
+ * If true, any Encodable-expression splice after that point would be inside a Python comment.
+ */
+ def hasCommentOutsideQuotes(lineText: String): Boolean = {
+ var inSingleQuotes = false
+ var inDoubleQuotes = false
+ var escaped = false
+
+ var i = 0
+ while (i < lineText.length) {
+ val ch = lineText.charAt(i)
+ if (escaped) escaped = false
+ else if (ch == '\\') escaped = true
+ else if (!inDoubleQuotes && ch == '\'') inSingleQuotes = !inSingleQuotes
+ else if (!inSingleQuotes && ch == '"') inDoubleQuotes = !inDoubleQuotes
+ else if (!inSingleQuotes && !inDoubleQuotes && ch == '#') return true
+ i += 1
+ }
+ false
+ }
+}
diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala
new file mode 100644
index 00000000000..dc9e977d329
--- /dev/null
+++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.RenderMode.{Encode, Plain}
+
+import java.nio.charset.StandardCharsets
+import java.util.Base64
+import scala.language.experimental.macros
+import scala.reflect.macros.blackbox
+
+/**
+ * Convenience type aliases for strings passed into the [[PythonTemplateBuilder]] interpolator.
+ *
+ * Design intent:
+ * - Some strings are “UI-provided” and must be rendered as a Python expression that decodes base64 at runtime.
+ * - Other strings are regular Python source fragments and should be spliced in as-is.
+ *
+ * The macro distinguishes Encodable strings via a TYPE_USE annotation (`String @EncodableStringAnnotation`).
+ */
+object PyStringTypes {
+
+ /**
+ * Treated as an Encodable string by the macro via a TYPE_USE annotation.
+ *
+ * Example:
+ * {{{
+ * import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableStringType
+ * import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ *
+ * val label: EncodableStringType = "Hello"
+ * val code = pyb"print($label)"
+ * }}}
+ */
+ type EncodableString = String @EncodableStringAnnotation
+
+ /**
+ * Normal python string (macro defaults to [[PythonLiteral]] when no [[EncodableStringAnnotation]] is present).
+ *
+ * This alias exists mostly for readability and symmetry with [[EncodableStringFactory]].
+ */
+ type PythonLiteral = String
+
+ /**
+ * Helper “constructor” and constants for [[EncodableString]].
+ *
+ * Note: the object and members are annotated so downstream type inference tends
+ * to keep the TYPE_USE annotation attached in common scenarios.
+ */
+ @EncodableStringAnnotation
+ object EncodableStringFactory {
+
+ /** Wrap a raw Scala string as an Encodable-marked string. */
+ @EncodableStringAnnotation
+ def apply(s: String): EncodableString = s
+
+ /** Empty Encodable string (still Encodable-marked). */
+ @EncodableStringAnnotation
+ val empty: EncodableString = ""
+ }
+
+ /**
+ * Helper “constructor” and constants for [[PythonLiteral]].
+ *
+ * This does not apply any Encodable semantics. It is regular Scala `String` usage.
+ */
+ object PyLiteralFactory {
+
+ /** Identity wrapper, used as a readability hint at call sites. */
+ def apply(s: String): PythonLiteral = s
+
+ /** Empty python string. */
+ val empty: PythonLiteral = ""
+ }
+}
+
+/**
+ * =PythonTemplateBuilder: ergonomic Python codegen via `pyb"..."`=
+ *
+ * This module provides a tiny DSL for assembling Python source code from Scala while preserving two competing goals:
+ * (1) developers want to write templates that look like normal Python, and (2) user-provided text must not be injected
+ * into the emitted Python as raw literals that can break syntax or create ambiguous token boundaries.
+ *
+ * The core idea is that every value spliced into a `pyb"..."` template is first classified into one of two buckets:
+ *
+ * - '''Python literals''' (ordinary Scala strings or already-safe fragments) are inserted as-is.
+ * - '''Encodable strings''' (typically UI-provided text) are base64-encoded at build time and rendered as a *Python
+ * expression* that decodes at runtime, rather than being embedded as a Python string literal.
+ *
+ * This classification is driven by a TYPE_USE annotation: `String @EncodableStringAnnotation`. The annotation is defined
+ * with a runtime retention and is allowed on fields, parameters, local variables, and type uses, so it survives many
+ * common Scala typing patterns (e.g., inferred vals, constructor params, or aliases). Users normally do not construct the
+ * annotation directly; instead, they use helper type aliases/factories in `PyStringTypes` for readability.
+ *
+ * ==Render modes==
+ *
+ * A `PythonTemplateBuilder` can be rendered in two modes:
+ *
+ * - `plain`: emit everything as raw text (useful for debugging or when you know all content is safe).
+ * - `encode`: emit encodable chunks as Python decode expressions (the default `toString` behavior).
+ *
+ * Internally this is represented as a small sealed trait enum (`RenderMode.Plain` / `RenderMode.Encode`) rather than an
+ * integer flag, to keep call sites self-documenting and avoid “magic numbers”.
+ *
+ * ==Chunk model (immutable, composable)==
+ *
+ * A builder is an immutable list of chunks:
+ *
+ * - `Text(value)` for literal template parts
+ * - `Value(renderer)` for interpolated arguments that know how to render in each mode
+ *
+ * Two concrete renderers are provided:
+ *
+ * - `EncodableStringRenderer`: pre-encodes `stringValue` as base64 (UTF-8) once, and in `Encode` mode produces a Python
+ * expression like `self.decode_python_template('')` given by [[wrapWithPythonDecoderExpr]].
+ * - `PyLiteralStringRenderer`: always emits the raw string value unchanged.
+ *
+ * Builders can be concatenated with `+` (builder + builder), which merges adjacent `Text` chunks for compactness.
+ * Direct concatenation with a plain `String` is intentionally unsupported to prevent bypassing the macro’s safety checks.
+ *
+ * ==How the `pyb"..."` macro works==
+ *
+ * The `pyb` interpolator is implemented as a Scala macro. At compile time it receives:
+ *
+ * - the literal parts from the `StringContext` (the “gaps” around `$args`)
+ * - the argument trees corresponding to each `$arg`
+ *
+ * The macro’s pipeline is:
+ *
+ * 1. '''Extract literal parts''' from the `StringContext` AST and ensure they are *string literals*. If any part is not
+ * a literal, compilation aborts. This prevents “template text” from being computed dynamically where correctness and
+ * boundary analysis would become unreliable.
+ *
+ * 2. '''Classify direct encodable arguments''' using `EncodableInspector`:
+ * it inspects both the argument symbol and the argument type to determine whether the encodable annotation is present.
+ * This includes a small “accessor hop” so that annotations placed on fields/constructor params are still visible when
+ * call sites reference getters.
+ *
+ * 3. '''Compile-time boundary validation for direct encodables''':
+ * if an argument is directly encodable (and not a nested builder), `BoundaryValidator.validateCompileTime` is run on
+ * its surrounding literal context. The validator performs quick lexical checks on the current line:
+ *
+ * - the splice must not occur inside an unclosed single/double-quoted string
+ * - the splice must not occur after a `#` comment marker
+ * - the splice must not be immediately adjacent to identifier characters or quote characters on either side
+ *
+ * These restrictions exist because an Encodable string renders as a Python *expression*, not a Python string literal.
+ * Putting an expression inside quotes, inside a comment, or glued to an identifier would either be invalid Python or
+ * silently change tokenization in surprising ways.
+ *
+ * 4. '''Lower each argument into a builder''':
+ * every `$arg` becomes a `PythonTemplateBuilder`.
+ *
+ * - If the argument is already a `PythonTemplateBuilder`, it is used directly.
+ * - Otherwise, it is wrapped into a `StringRenderer` (`EncodableStringRenderer` or `PyLiteralStringRenderer`) and
+ * turned into a minimal builder containing a single `Value(...)` chunk.
+ *
+ * Each argument is evaluated once and stored in a fresh local `val __pyb_argN` so that expensive expressions or
+ * side-effects are not duplicated by expansion.
+ *
+ * 5. '''Runtime safety for nested builders''':
+ * for arguments that are themselves `PythonTemplateBuilder`s, the macro cannot always know at compile time whether they
+ * contain Encodable chunks (they may be computed, returned, or composed elsewhere). For these nested builders, the macro
+ * conditionally emits runtime guards *only when the surrounding context is unsafe* (inside quotes, after comments, or
+ * adjacent to “bad neighbor” characters). The guard pattern is:
+ *
+ * {{{
+ * if (__pyb_argN.containsEncodableString) throw new IllegalArgumentException("...")
+ * }}}
+ *
+ * This preserves the ergonomics of composing builders while keeping the same safety contract as direct splices.
+ *
+ * 6. '''Assemble the final builder''':
+ * the macro concatenates `text0 + arg0 + text1 + arg1 + ... + textN` into one `PythonTemplateBuilder`.
+ *
+ * ==Lexical checks (best-effort, intentionally small)==
+ *
+ * The boundary rules rely on `PythonLexerUtils`, a tiny state machine that scans only the “current line tail” to decide
+ * whether quotes are unbalanced and whether a `#` begins a comment outside quotes. This is not a full Python parser.
+ * It is deliberately lightweight so the macro stays fast and so the helpers can be unit-tested independently.
+ *
+ * ==Extensibility notes==
+ *
+ * The design keeps all rendering behavior behind `StringRenderer`, and keeps boundary policy in `BoundaryValidator`.
+ * If new encoding schemes, alternate runtime decode helpers, or additional safety rules are needed, they can be introduced
+ * without rewriting the template-building API. In particular, swapping `wrapWithPythonDecoderExpr` or adding new renderers
+ * is a contained change: the macro only needs to decide *which renderer* to use, not *how it renders*.
+ */
+object PythonTemplateBuilder {
+
+ // ===== render mode enum (no Ints) =====
+ def wrapWithPythonDecoderExpr(text: String): String =
+ s"self.decode_python_template('$text')"
+
+ sealed trait RenderMode extends Product with Serializable
+ object RenderMode {
+ case object Plain extends RenderMode
+ case object Encode extends RenderMode
+ }
+
+ // ===== wrappers =====
+
+ /**
+ * Base abstraction for values that can be spliced into a [[PythonTemplateBuilder]].
+ *
+ * A [[StringRenderer]] knows how to render itself depending on `mode`.
+ */
+ sealed trait StringRenderer extends Product with Serializable {
+ def stringValue: String
+ def render(mode: RenderMode): String
+ }
+
+ /**
+ * Encodable string: encoded-mode wraps with [[wrapWithPythonDecoderExpr]],
+ * plain-mode is raw `stringValue`.
+ */
+ final case class EncodableStringRenderer(stringValue: String) extends StringRenderer {
+ private val encodedB64: String =
+ Base64.getEncoder.encodeToString(stringValue.getBytes(StandardCharsets.UTF_8))
+
+ override def render(mode: RenderMode): String =
+ if (mode == Encode) wrapWithPythonDecoderExpr(encodedB64) else stringValue
+ }
+
+ /**
+ * Python literal string: always raw `stringValue` regardless of mode.
+ */
+ final case class PyLiteralStringRenderer(stringValue: String) extends StringRenderer {
+ override def render(mode: RenderMode): String = stringValue
+ }
+
+ // ===== internal chunk model =====
+
+ private[pybuilder] sealed trait Chunk extends Product with Serializable
+ private[pybuilder] final case class Text(value: String) extends Chunk
+ private[pybuilder] final case class Value(value: StringRenderer) extends Chunk
+
+ /**
+ * Build a [[PythonTemplateBuilder]] from literal parts and already-wrapped args.
+ *
+ * @param literalParts raw StringContext parts (length = args + 1)
+ * @param pyArgs args wrapped as [[StringRenderer]]
+ */
+ private[amber] def fromInterpolated(literalParts: List[String], pyArgs: List[StringRenderer]): PythonTemplateBuilder = {
+ require(
+ literalParts.length == pyArgs.length + 1,
+ s"pyb interpolator mismatch: parts=${literalParts.length}, args=${pyArgs.length}"
+ )
+
+ val chunkBuilder = List.newBuilder[Chunk]
+ chunkBuilder += Text(literalParts.head)
+
+ var argIndex = 0
+ while (argIndex < pyArgs.length) {
+ chunkBuilder += Value(pyArgs(argIndex))
+ chunkBuilder += Text(literalParts(argIndex + 1))
+ argIndex += 1
+ }
+
+ new PythonTemplateBuilder(compact(chunkBuilder.result()))
+ }
+
+ /** Merge adjacent text chunks. */
+ private def compact(chunksToCompact: List[Chunk]): List[Chunk] =
+ chunksToCompact.foldRight(List.empty[Chunk]) {
+ case (Text(leftText), Text(rightText) :: remaining) =>
+ Text(leftText + rightText) :: remaining
+ case (chunk, compactedTail) =>
+ chunk :: compactedTail
+ }
+
+ /** Concatenate chunk lists, merging boundary text chunks when possible. */
+ private def concatChunks(leftChunks: List[Chunk], rightChunks: List[Chunk]): List[Chunk] =
+ (leftChunks, rightChunks) match {
+ case (Nil, _) => rightChunks
+ case (_, Nil) => leftChunks
+ case _ =>
+ (leftChunks.last, rightChunks.head) match {
+ case (Text(leftText), Text(rightText)) =>
+ compact(leftChunks.dropRight(1) ::: Text(leftText + rightText) :: rightChunks.tail)
+ case _ =>
+ leftChunks ::: rightChunks
+ }
+ }
+
+ // ===== custom interpolator =====
+
+ /** Adds the `pyb"..."` string interpolator. */
+ implicit final class PythonTemplateBuilderStringContext(private val stringContext: StringContext) extends AnyVal {
+ def pyb(argValues: Any*): PythonTemplateBuilder = macro Macros.pybImpl
+ }
+
+ object Macros {
+
+ /** Macro entry point for `pyb"..."`. */
+ def pybImpl(macroCtx: blackbox.Context)(
+ argValues: macroCtx.Expr[Any]*
+ ): macroCtx.Expr[PythonTemplateBuilder] = {
+ import macroCtx.universe._
+
+ // Stable, fully-qualified references as Trees/TypeTrees (NOT Strings)
+ val PTBTerm: Tree =
+ q"_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder"
+ val PTBType: Tree =
+ tq"_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder"
+ val StringRendererTpt: Tree =
+ tq"_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder.StringRenderer"
+
+ val inspector = new EncodableInspector[macroCtx.type](macroCtx)
+ val validator = new BoundaryValidator[macroCtx.type](macroCtx)
+
+ // --- extract literal parts from StringContext ---
+ val literalPartTrees: List[Tree] = macroCtx.prefix.tree match {
+ case Apply(_, List(Apply(_, rawPartTrees))) => rawPartTrees
+ case prefixTree =>
+ macroCtx.abort(
+ macroCtx.enclosingPosition,
+ s"pyb interpolator: cannot extract StringContext parts from: ${showRaw(prefixTree)}"
+ )
+ }
+
+ // Ensure parts are string literals.
+ literalPartTrees.foreach {
+ case Literal(Constant(_: String)) => // ok
+ case nonLiteral =>
+ macroCtx.abort(
+ macroCtx.enclosingPosition,
+ s"pyb interpolator requires literal parts; got: ${showRaw(nonLiteral)}"
+ )
+ }
+
+ val literalPartStrings: List[String] =
+ literalPartTrees.map { case Literal(Constant(s: String)) => s }
+
+ // --- compile-time boundary checks for *direct* Encodable args ---
+ argValues.toList.zipWithIndex.foreach {
+ case (argExpr, argIndex) if inspector.isDirectEncodableStringArg(argExpr) =>
+ val leftPart = literalPartStrings(argIndex)
+ val rightPart = literalPartStrings(argIndex + 1)
+ val prefixSource = literalPartStrings.take(argIndex + 1).mkString("")
+ val errorPos =
+ if (argExpr.tree.pos != NoPosition) argExpr.tree.pos else macroCtx.enclosingPosition
+
+ validator.validateCompileTime(
+ validator.CompileTimeContext(leftPart, rightPart, prefixSource, argIndex, errorPos)
+ )
+
+ case _ => // no-op
+ }
+
+ // --- builders for literal parts and args ---
+ val emptyRenderArgs =
+ q"_root_.scala.List.empty[$StringRendererTpt]"
+
+ def textBuilder(partTree: Tree): Tree =
+ q"$PTBTerm.fromInterpolated(_root_.scala.List($partTree), $emptyRenderArgs)"
+
+ val emptyStrLit: Tree = Literal(Constant(""))
+
+ def valueBuilder(argExpr: macroCtx.Expr[Any]): Tree = {
+ val wrapped = inspector.wrapArg(argExpr)
+ q"$PTBTerm.fromInterpolated(_root_.scala.List($emptyStrLit, $emptyStrLit), _root_.scala.List($wrapped))"
+ }
+
+ val pythonTemplateBuilderTpe =
+ typeOf[_root_.org.apache.texera.amber.pybuilder.PythonTemplateBuilder]
+
+ def argAsBuilder(argExpr: macroCtx.Expr[Any]): Tree = {
+ val argTree = argExpr.tree
+ val argType = argTree.tpe
+ if (argType != null && (argType.dealias.widen <:< pythonTemplateBuilderTpe)) {
+ q"$argTree.asInstanceOf[$PTBType]"
+ } else {
+ valueBuilder(argExpr)
+ }
+ }
+
+ // Evaluate each arg once.
+ val evaluatedArgBuilders: List[Tree] =
+ argValues.toList.zipWithIndex.map {
+ case (argExpr, i) =>
+ val argValName = TermName(s"__pyb_arg$i")
+ q"val $argValName: $PTBType = ${argAsBuilder(argExpr)}"
+ }
+
+ // Runtime boundary checks for nested PythonTemplateBuilders that *may* contain Encodable chunks.
+ val nestedBuilderBoundaryChecks: List[Tree] =
+ argValues.toList.zipWithIndex.flatMap {
+ case (argExpr, argIndex) if inspector.isPythonTemplateBuilderArg(argExpr) =>
+ val leftPart = literalPartStrings(argIndex)
+ val rightPart = literalPartStrings(argIndex + 1)
+ val prefixSource = literalPartStrings.take(argIndex + 1).mkString("")
+
+ val argIdent = Ident(TermName(s"__pyb_arg$argIndex"))
+ validator.runtimeChecksForNestedBuilder(
+ validator.RuntimeContext(leftPart, rightPart, prefixSource, argIndex),
+ argIdent
+ )
+
+ case _ => Nil
+ }
+
+ // Concatenate: text0 + arg0 + text1 + arg1 + ... + textN
+ val renderTree: Tree = {
+ val baseTree = textBuilder(literalPartTrees.head)
+ argValues.toList.zipWithIndex.foldLeft(baseTree) {
+ case (acc, (_, i)) =>
+ val argIdent = Ident(TermName(s"__pyb_arg$i"))
+ val nextText = textBuilder(literalPartTrees(i + 1))
+ q"$acc + $argIdent + $nextText"
+ }
+ }
+
+ val finalExpr: Tree =
+ q"""
+ {
+ ..$evaluatedArgBuilders
+ ..$nestedBuilderBoundaryChecks
+ $renderTree
+ }
+ """
+
+ macroCtx.Expr[PythonTemplateBuilder](finalExpr)
+ }
+ }
+}
+
+/**
+ * An immutable builder for Python source produced via `pyb"..."` interpolation.
+ */
+final class PythonTemplateBuilder private[pybuilder] (private val chunks: List[PythonTemplateBuilder.Chunk])
+ extends Serializable {
+ import PythonTemplateBuilder._
+
+ def +(that: PythonTemplateBuilder): PythonTemplateBuilder =
+ new PythonTemplateBuilder(concatChunks(this.chunks, that.chunks))
+
+ def +(that: String): PythonTemplateBuilder =
+ throw new UnsupportedOperationException(s"Direct String concatenation is not supported $that")
+
+ def plain: String = render(Plain)
+
+ def encode: String = render(Encode)
+
+ override def toString: String = encode
+
+ def containsEncodableString: Boolean =
+ chunks.exists {
+ case Value(_: EncodableStringRenderer) => true
+ case _ => false
+ }
+
+ private def render(renderMode: RenderMode): String = {
+ val out = new java.lang.StringBuilder
+ chunks.foreach {
+ case Text(text) => out.append(text)
+ case Value(renderer) => out.append(renderer.render(renderMode))
+ }
+ out.toString
+ .stripMargin
+ .replace("\r\n", "\n")
+ .replace("\r", "\n")
+ }
+}
diff --git a/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonLexerUtilsSpec.scala b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonLexerUtilsSpec.scala
new file mode 100644
index 00000000000..ea473969e7e
--- /dev/null
+++ b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonLexerUtilsSpec.scala
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.scalatest.funsuite.AnyFunSuite
+
+class PythonLexerUtilsSpec extends AnyFunSuite {
+
+ // -------- isIdentChar --------
+
+ test("isIdentChar: lowercase letter is identifier char") {
+ assert(PythonLexerUtils.isIdentChar('a'))
+ }
+
+ test("isIdentChar: uppercase letter is identifier char") {
+ assert(PythonLexerUtils.isIdentChar('Z'))
+ }
+
+ test("isIdentChar: digit is identifier char") {
+ assert(PythonLexerUtils.isIdentChar('5'))
+ }
+
+ test("isIdentChar: underscore is identifier char") {
+ assert(PythonLexerUtils.isIdentChar('_'))
+ }
+
+ test("isIdentChar: dash is not identifier char") {
+ assert(!PythonLexerUtils.isIdentChar('-'))
+ }
+
+ test("isIdentChar: space is not identifier char") {
+ assert(!PythonLexerUtils.isIdentChar(' '))
+ }
+
+ test("isIdentChar: hash is not identifier char") {
+ assert(!PythonLexerUtils.isIdentChar('#'))
+ }
+
+ // -------- isBadNeighbor --------
+
+ test("isBadNeighbor: single quote is bad neighbor") {
+ assert(PythonLexerUtils.isBadNeighbor('\''))
+ }
+
+ test("isBadNeighbor: double quote is bad neighbor") {
+ assert(PythonLexerUtils.isBadNeighbor('"'))
+ }
+
+ test("isBadNeighbor: identifier chars are bad neighbors") {
+ assert(PythonLexerUtils.isBadNeighbor('a'))
+ assert(PythonLexerUtils.isBadNeighbor('Z'))
+ assert(PythonLexerUtils.isBadNeighbor('0'))
+ assert(PythonLexerUtils.isBadNeighbor('_'))
+ }
+
+ test("isBadNeighbor: whitespace is not bad neighbor") {
+ assert(!PythonLexerUtils.isBadNeighbor(' '))
+ }
+
+ test("isBadNeighbor: punctuation like comma is not bad neighbor") {
+ assert(!PythonLexerUtils.isBadNeighbor(','))
+ }
+
+ // -------- lineTail --------
+
+ test("lineTail: string without newline returns full string") {
+ val text = "no-newline"
+ assert(PythonLexerUtils.lineTail(text) == text)
+ }
+
+ test("lineTail: returns text after single newline") {
+ val text = "first\nsecond"
+ assert(PythonLexerUtils.lineTail(text) == "second")
+ }
+
+ test("lineTail: returns text after last newline") {
+ val text = "a\nb\nc\nlast-line"
+ assert(PythonLexerUtils.lineTail(text) == "last-line")
+ }
+
+ test("lineTail: works with trailing newline (returns empty)") {
+ val text = "first\nsecond\n"
+ assert(PythonLexerUtils.lineTail(text) == "")
+ }
+
+ // -------- hasUnclosedQuote --------
+
+ test("hasUnclosedQuote: empty string has no unclosed quote") {
+ assert(!PythonLexerUtils.hasUnclosedQuote(""))
+ }
+
+ test("hasUnclosedQuote: balanced single quotes returns false") {
+ assert(!PythonLexerUtils.hasUnclosedQuote("'a'"))
+ }
+
+ test("hasUnclosedQuote: balanced double quotes returns false") {
+ assert(!PythonLexerUtils.hasUnclosedQuote("\"a\""))
+ }
+
+ test("hasUnclosedQuote: unclosed single quote returns true") {
+ assert(PythonLexerUtils.hasUnclosedQuote("'unclosed"))
+ }
+
+ test("hasUnclosedQuote: unclosed double quote returns true") {
+ assert(PythonLexerUtils.hasUnclosedQuote("\"unclosed"))
+ }
+
+ test("hasUnclosedQuote: escaped single quote inside single quotes does not break balance") {
+ val text = "'it\\'s ok'"
+ assert(!PythonLexerUtils.hasUnclosedQuote(text))
+ }
+
+ test("hasUnclosedQuote: escaped double quote inside double quotes does not break balance") {
+ val text = "\"he said \\\"hi\\\"\""
+ assert(!PythonLexerUtils.hasUnclosedQuote(text))
+ }
+
+ test("hasUnclosedQuote: mixed quotes with proper closing returns false") {
+ val text = "'a' + \"b\""
+ assert(!PythonLexerUtils.hasUnclosedQuote(text))
+ }
+
+ // -------- hasCommentOutsideQuotes --------
+
+ test("hasCommentOutsideQuotes: no hash means no comment") {
+ assert(!PythonLexerUtils.hasCommentOutsideQuotes("print(1)"))
+ }
+
+ test("hasCommentOutsideQuotes: hash outside quotes is a comment") {
+ assert(PythonLexerUtils.hasCommentOutsideQuotes("x = 1 # comment"))
+ }
+
+ test("hasCommentOutsideQuotes: hash inside single quotes is not a comment") {
+ assert(!PythonLexerUtils.hasCommentOutsideQuotes("print('# not comment')"))
+ }
+
+ test("hasCommentOutsideQuotes: hash inside double quotes is not a comment") {
+ assert(!PythonLexerUtils.hasCommentOutsideQuotes("print(\"# not comment\")"))
+ }
+
+ test("hasCommentOutsideQuotes: escaped quotes preserve quote state correctly") {
+ val line = "print(\"\\\"# still in string\\\"\") # comment here"
+ assert(PythonLexerUtils.hasCommentOutsideQuotes(line))
+ }
+
+ test("hasCommentOutsideQuotes: multiple hashes only first outside quotes matters") {
+ val line = "print('# in string') # real comment # more"
+ assert(PythonLexerUtils.hasCommentOutsideQuotes(line))
+ }
+}
diff --git a/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilderSpec.scala b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilderSpec.scala
new file mode 100644
index 00000000000..d2e423f810a
--- /dev/null
+++ b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilderSpec.scala
@@ -0,0 +1,598 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.apache.texera.amber.pybuilder.PyStringTypes.{EncodableString, PythonLiteral}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.{EncodableStringRenderer, PyLiteralStringRenderer, PythonTemplateBuilderStringContext}
+import org.scalatest.funsuite.AnyFunSuite
+
+import java.nio.charset.StandardCharsets
+import java.util.Base64
+import scala.annotation.meta.field
+import scala.reflect.runtime.currentMirror
+import scala.tools.reflect.ToolBox
+
+class PythonTemplateBuilderSpec extends AnyFunSuite {
+
+ // ---------- Helpers ----------
+ private def base64Of(text: String): String =
+ Base64.getEncoder.encodeToString(text.getBytes(StandardCharsets.UTF_8))
+
+ private def decodeExpr(text: String): String =
+ PythonTemplateBuilder.wrapWithPythonDecoderExpr(base64Of(text))
+ // Toolbox helpers: used to assert runtime exceptions without checking error strings.
+ private lazy val tb: ToolBox[scala.reflect.runtime.universe.type] = currentMirror.mkToolBox()
+
+ private def inPybuilderPkg(code: String): String =
+ s"""package org.apache.texera.amber.pybuilder {
+ |
+ |$code
+ |
+ |}""".stripMargin
+
+ private def assertToolboxDoesNotCompile(code: String): Unit = {
+ intercept[Throwable] {
+ // compile only (don’t run); macro expansion happens during compilation
+ tb.compile(tb.parse(inPybuilderPkg(code)))
+ }
+ ()
+ }
+ // Unicode escapes in *generated* Scala source: must be written as "\\uXXXX" in this test file.
+ private def scalaUnicodeEscape(ch: Char): String =
+ f"\\\\u${ch.toInt}%04X"
+
+ // ========================================================================
+ // Rendering basics (plain vs encoded)
+ // ========================================================================
+
+ test("plain renders empty text") {
+ val builder = pyb""
+ assert(builder.plain == "")
+ }
+
+ test("plain renders literal text") {
+ val builder = pyb"hello"
+ assert(builder.plain == "hello")
+ }
+
+ test("encoded renders literal text (no UI args) same as plain") {
+ val builder = pyb"hello"
+ assert(builder.encode == "hello")
+ }
+
+ test("toString defaults to encoded") {
+ val builder = pyb"hello"
+ assert(builder.toString == builder.encode)
+ }
+
+ test("StringPyMk renders raw in both modes") {
+ val pyFragment = PyLiteralStringRenderer("print('x')")
+ assert(pyFragment.render(PythonTemplateBuilder.RenderMode.Plain) == "print('x')")
+ assert(pyFragment.render(PythonTemplateBuilder.RenderMode.Encode) == "print('x')")
+ }
+
+ test("EncodableString renders raw in plain mode") {
+ val uiText = EncodableStringRenderer("abc")
+ assert(uiText.render(PythonTemplateBuilder.RenderMode.Plain) == "abc")
+ }
+
+ test("EncodableString renders B64.decode('') in encoded mode") {
+ val rawText = "abc"
+ val uiText = EncodableStringRenderer(rawText)
+ assert(uiText.render(PythonTemplateBuilder.RenderMode.Encode) == decodeExpr(rawText))
+ }
+
+ test("EncodableString base64 uses UTF-8 and handles unicode") {
+ val rawText = "你好 👋"
+ val uiText = EncodableStringRenderer(rawText)
+ assert(uiText.render(PythonTemplateBuilder.RenderMode.Encode) == decodeExpr(rawText))
+ assert(uiText.render(PythonTemplateBuilder.RenderMode.Plain) == rawText)
+ }
+
+ test("pyb interpolator defaults to StringPyMk for normal values (toString)") {
+ val value = 42
+ val builder = pyb"val=$value"
+ assert(builder.plain == "val=42")
+ assert(builder.encode == "val=42")
+ }
+
+ test("pyb supports multiple args") {
+ val firstValue = 1
+ val secondValue = "two"
+ val thirdValue = 3.0
+ val builder = pyb"a=$firstValue b=$secondValue c=$thirdValue"
+ assert(builder.plain == "a=1 b=two c=3.0")
+ }
+
+ test("passing a PyString (EncodableString) is preserved (no re-wrapping)") {
+ val rawText = "ui"
+ val uiPyString: PythonTemplateBuilder.StringRenderer = EncodableStringRenderer(rawText)
+ val builder = pyb"$uiPyString"
+ assert(builder.plain == rawText)
+ assert(builder.encode == decodeExpr(rawText))
+ }
+
+ test("passing a PyString (StringPyMk) is preserved") {
+ val rawPy: PythonTemplateBuilder.StringRenderer = PyLiteralStringRenderer("x + 1")
+ val builder = pyb"$rawPy"
+ assert(builder.plain == "x + 1")
+ assert(builder.encode == "x + 1")
+ }
+
+ // ========================================================================
+ // Whitespace / multiline / normalization
+ // ========================================================================
+
+ test("stripMargin is applied on render() output") {
+ val builder =
+ pyb"""|line1
+ |line2"""
+ assert(builder.plain == "line1\nline2")
+ }
+
+ test("stripMargin works with interpolation too") {
+ val value = 7
+ val builder =
+ pyb"""|line1 $value
+ |line2"""
+ assert(builder.plain == "line1 7\nline2")
+ }
+
+ // ========================================================================
+ // Concatenation
+ // ========================================================================
+
+ test("operator + concatenates builders") {
+ val left = pyb"hello "
+ val right = pyb"world"
+ assert((left + right).plain == "hello world")
+ }
+
+ test("operator + preserves encoded behavior when mixing UI and raw") {
+ val uiText = EncodableStringRenderer("X")
+ val prefix = pyb"pre:"
+ val middle = pyb"$uiText"
+ val suffix = pyb":post"
+ val combined = prefix + middle + suffix
+ assert(combined.plain == "pre:X:post")
+ assert(combined.encode == s"pre:${decodeExpr("X")}:post")
+ }
+
+ test("repeated concatenation still renders correctly") {
+ val combined = pyb"a" + pyb"b" + pyb"c"
+ assert(combined.plain == "abc")
+ assert(combined.encode == "abc")
+ }
+
+ test("empty builder renders empty") {
+ val builder = pyb""
+ assert(builder.plain.isEmpty)
+ assert(builder.encode.isEmpty)
+ }
+
+ // ========================================================================
+ // Annotation / TYPE_USE behavior
+ // ========================================================================
+
+ test("TYPE_USE alias EncodableString triggers UI encoding") {
+ val uiText: EncodableString = "hello"
+ val builder = pyb"$uiText"
+ assert(builder.plain == "hello")
+ assert(builder.encode == decodeExpr("hello"))
+ }
+
+ test("EncodableString helper apply triggers UI encoding") {
+ val uiText: EncodableString = PyStringTypes.EncodableStringFactory("hey")
+ val builder = pyb"$uiText"
+ assert(builder.encode == decodeExpr("hey"))
+ }
+
+ test("TYPE_USE annotation on val type triggers UI encoding") {
+ val uiText: String @EncodableStringAnnotation = "typeuse"
+ val builder = pyb"$uiText"
+ assert(builder.encode == decodeExpr("typeuse"))
+ }
+
+ test("@StringUI parameter triggers UI encoding") {
+ def build(@EncodableStringAnnotation uiText: String): PythonTemplateBuilder = pyb"$uiText"
+ val builder = build("param")
+ assert(builder.encode == decodeExpr("param"))
+ }
+
+ test("@StringUI local val triggers UI encoding") {
+ def build(): PythonTemplateBuilder = {
+ @EncodableStringAnnotation val uiText: String = "local"
+ pyb"$uiText"
+ }
+ val builder = build()
+ assert(builder.encode == decodeExpr("local"))
+ }
+
+ test("@StringUI local val triggers UI encoding even when type is inferred") {
+ def build(): PythonTemplateBuilder = {
+ @EncodableStringAnnotation val uiText = "local-inferred"
+ pyb"$uiText"
+ }
+ val builder = build()
+ assert(builder.encode == decodeExpr("local-inferred"))
+ }
+
+ test("@StringUI lambda parameter triggers UI encoding") {
+ val uiToBuilder: (String @EncodableStringAnnotation) => PythonTemplateBuilder = uiText => pyb"$uiText"
+ val builder = uiToBuilder("lambda")
+ assert(builder.encode == decodeExpr("lambda"))
+ }
+
+ test("@StringUI lambda param + map + mkString triggers UI encoding per element") {
+ val rawItems = List("a", "b", "c")
+ val joinedEncoded =
+ rawItems.map((uiItem: String @EncodableStringAnnotation) => pyb"$uiItem").mkString("[", ", ", "]")
+ assert(joinedEncoded == s"[${rawItems.map(decodeExpr).mkString(", ")}]")
+ }
+
+ test("List[String @StringUI] element access preserves UI encoding") {
+ val uiItems: List[String @EncodableStringAnnotation] = List("first", "second")
+ val first = uiItems.head
+ val builder = pyb"$first"
+ assert(builder.encode == decodeExpr("first"))
+ }
+
+ test("Erasing List[String @StringUI] to List[String] drops UI encoding") {
+ val uiItems: List[String @EncodableStringAnnotation] = List("erased")
+ val erased: List[String] = uiItems.map((uiItem: String @EncodableStringAnnotation) => (uiItem: String))
+ val builder = pyb"${erased.head}"
+ assert(builder.encode == "erased")
+ }
+
+ test("@(StringUI @field) on case class field triggers UI encoding via accessor/field") {
+ final case class WithFieldAnnotation(@(EncodableStringAnnotation @field) uiText: String)
+ val value = WithFieldAnnotation("field")
+ val builder = pyb"${value.uiText}"
+ assert(builder.encode == decodeExpr("field"))
+ }
+
+ test("@StringUI on case class param without @field does not trigger UI encoding via accessor") {
+ final case class WithoutFieldAnnotation(@EncodableStringAnnotation uiText: String)
+ val value = WithoutFieldAnnotation("param-only")
+ val builder = pyb"${value.uiText}"
+ assert(builder.encode == "param-only")
+ }
+
+ test("unannotated String does not become UI (stays raw python)") {
+ val rawText: String = "raw"
+ val builder = pyb"$rawText"
+ assert(builder.encode == "raw")
+ }
+
+ test("StringPy alias remains raw") {
+ val rawText: PythonLiteral = "raw2"
+ val builder = pyb"$rawText"
+ assert(builder.encode == "raw2")
+ }
+
+ // ========================================================================
+ // Compile-time checks (direct UI args)
+ // ========================================================================
+
+ test("UI with whitespace boundaries compiles") {
+ assertCompiles("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiWhitespaceBoundariesOk { val ui: EncodableString = "x"; val b = pyb"foo $ui bar" }
+ """)
+ }
+
+ test("UI next to comma is allowed (common in function args)") {
+ assertCompiles("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiCommaOk { val ui: EncodableString = "x"; val b = pyb"f($ui, 1)" }
+ """)
+ }
+
+ test("UI next to parentheses is allowed") {
+ assertCompiles("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiParensOk { val ui: EncodableString = "x"; val b = pyb"($ui)" }
+ """)
+ }
+
+ test("hash inside quotes does not count as a comment marker (UI allowed afterwards)") {
+ assertCompiles("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object HashInQuotesOk { val ui: EncodableString = "x"; val b = pyb"print('#') $ui" }
+ """)
+ }
+
+ test("UI glued to identifier on the left does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiGluedLeftBad { val ui: EncodableString = "x"; val b = pyb"foo$ui" }
+ """)
+ }
+
+ test("UI glued to identifier on the right does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiGluedRightBad { val ui: EncodableString = "x"; val b = pyb"${ui}bar" }
+ """)
+ }
+
+ test("UI glued to a quote on the right does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiGluedQuoteBad { val ui: EncodableString = "x"; val b = pyb"${ui}'" }
+ """)
+ }
+
+ test("UI placed inside a quoted python string literal does not compile (single quotes)") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiInsideSingleQuotesBad { val ui: EncodableString = "x"; val b = pyb"print('${ui}')" }
+ """)
+ }
+
+ test("UI placed inside a quoted python string literal does not compile (double quotes)") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiInsideDoubleQuotesBad {
+ val ui: EncodableString = "x"
+ val b = pyb"print(\\"${ui}\\")"
+ }
+ """)
+ }
+
+ test("UI placed after a python comment marker on same line does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiAfterCommentBad { val ui: EncodableString = "x"; val b = pyb"foo # ${ui}" }
+ """)
+ }
+
+ test("UI placed after a python comment marker on same line does not compile (no whitespace)") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PyStringTypes._
+ object UiAfterCommentNoSpaceBad { val ui: EncodableString = "x"; val b = pyb"foo #${ui}" }
+ """)
+ }
+
+ test("comment marker on previous line does not affect next line (lineTail behavior)") {
+ assertCompiles(
+ "import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._\n" +
+ "import org.apache.texera.amber.pybuilder.PyStringTypes._\n" +
+ "object CommentPrevLineOk {\n" +
+ " val ui: EncodableString = \"x\"\n" +
+ " val b = pyb\"\"\"|# comment\n" +
+ " |$ui\"\"\"\n" +
+ "}\n"
+ )
+ }
+
+
+ test("PyString (EncodableString) glued to identifier on the left does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.EncodableString
+ object PyStringGluedLeftBad { val ui = EncodableString("x"); val b = pyb"foo${ui}" }
+ """)
+ }
+
+ test("PyString (EncodableString) inside a quoted python string literal does not compile") {
+ assertDoesNotCompile("""
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.EncodableString
+ object PyStringInsideQuotesBad { val ui = EncodableString("x"); val b = pyb"print('${ui}')" }
+ """)
+ }
+
+ test("all isBadNeighbor characters reject direct UI adjacency at compile time (left + right)") {
+ val candidates = (33 to 126).map(_.toChar) // printable ASCII, avoids whitespace
+ val badChars = candidates.filter(PythonLexerUtils.isBadNeighbor)
+
+ // This is intentionally exhaustive over the implementation-defined "bad neighbor" set.
+ // We assert only compile success/failure, not the specific error message.
+ badChars.zipWithIndex.foreach { case (ch, i) =>
+ val esc = scalaUnicodeEscape(ch)
+
+ val leftAdj =
+ s"""
+ |import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ |import org.apache.texera.amber.pybuilder.PyStringTypes._
+ |object UiBadLeft_$i {
+ | val ui: EncodableString = "x"
+ | val b = pyb\"\"\"pre$esc${'$'}{ui}post\"\"\"
+ |}
+ |""".stripMargin
+
+ val rightAdj =
+ s"""
+ |import org.apache.texera.amber.pybuilder.PythonTemplateBuilder._
+ |import org.apache.texera.amber.pybuilder.PyStringTypes._
+ |object UiBadRight_$i {
+ | val ui: EncodableString = "x"
+ | val b = pyb\"\"\"pre${'$'}{ui}$esc post\"\"\"
+ |}
+ |""".stripMargin
+
+ assertToolboxDoesNotCompile(leftAdj)
+ assertToolboxDoesNotCompile(rightAdj)
+ }
+ }
+
+ // ========================================================================
+ // Interpolator semantics / evaluation
+ // ========================================================================
+
+ test("interpolated args are evaluated once and not re-evaluated on render") {
+ var evalCount = 0
+ def nextValue(): String = {
+ evalCount += 1
+ "v"
+ }
+
+ val builder = pyb"${nextValue()}"
+ assert(evalCount == 1)
+
+ builder.plain
+ assert(evalCount == 1)
+
+ builder.encode
+ assert(evalCount == 1)
+ }
+
+ // ========================================================================
+ // Nested PythonTemplateBuilder behavior (mode propagation + runtime UI checks)
+ // ========================================================================
+
+ test("nested PythonTemplateBuilder with UI propagates mode (plain)") {
+ val uiText = EncodableStringRenderer("Z")
+ val inner = pyb"X=$uiText"
+ val outer = pyb"pre $inner post"
+ assert(outer.plain == "pre X=Z post")
+ }
+
+ test("nested PythonTemplateBuilder with UI propagates mode (encoded)") {
+ val uiText = EncodableStringRenderer("Z")
+ val inner = pyb"X=$uiText"
+ val outer = pyb"pre $inner post"
+ assert(outer.encode == s"pre X=${decodeExpr("Z")} post")
+ }
+
+ test("nested PythonTemplateBuilder without UI can appear inside python quotes (no runtime checks)") {
+ val inner = pyb"hello"
+ val outer = pyb"print('$inner')"
+ assert(outer.plain == "print('hello')")
+ assert(outer.encode == "print('hello')")
+ }
+
+ test("containsUi detects UI chunks correctly") {
+ val rawBuilder = pyb"raw"
+ val uiBuilder = pyb"${EncodableStringRenderer("x")}"
+ val combined = rawBuilder + uiBuilder
+ assert(!rawBuilder.containsEncodableString)
+ assert(uiBuilder.containsEncodableString)
+ assert(combined.containsEncodableString)
+ }
+
+ test("nested PythonTemplateBuilder containing UI inside single quotes throws at runtime") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ intercept[IllegalArgumentException] {
+ pyb"print('$inner')"
+ }
+ }
+
+ test("nested PythonTemplateBuilder containing UI inside double quotes throws at runtime") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ intercept[IllegalArgumentException] {
+ pyb"""print("$inner")"""
+ }
+ }
+
+ test("nested PythonTemplateBuilder containing UI after comment marker throws at runtime (with and without whitespace)") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ intercept[IllegalArgumentException] {
+ pyb"foo # $inner"
+ }
+ intercept[IllegalArgumentException] {
+ pyb"foo #$inner"
+ }
+ }
+
+ test("nested PythonTemplateBuilder containing UI glued to identifier/digit throws at runtime") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ intercept[IllegalArgumentException] { pyb"foo$inner" }
+ intercept[IllegalArgumentException] { pyb"${inner}bar" }
+ intercept[IllegalArgumentException] { pyb"1$inner" }
+ intercept[IllegalArgumentException] { pyb"${inner}2" }
+ }
+
+ test("runtime guard does NOT throw when nested builder has no UI, even in unsafe boundary contexts") {
+ val inner = pyb"hello"
+ val outer1 = pyb"foo$inner"
+ val outer2 = pyb"${inner}bar"
+ val outer3 = pyb"print('$inner')"
+ val outer4 = pyb"foo #$inner"
+
+ assert(outer1.plain == "foohello")
+ assert(outer2.plain == "hellobar")
+ assert(outer3.plain == "print('hello')")
+ assert(outer4.plain == "foo #hello")
+ }
+
+ test("nested PythonTemplateBuilder containing UI with safe whitespace boundaries is allowed") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ val outer = pyb"foo $inner bar"
+ assert(outer.plain == "foo x bar")
+ assert(outer.encode == s"foo ${decodeExpr("x")} bar")
+ }
+
+ test("nested PythonTemplateBuilder containing UI next to punctuation is allowed") {
+ val inner = pyb"${EncodableStringRenderer("x")}"
+ val outer = pyb"f($inner, 1)"
+ assert(outer.plain == "f(x, 1)")
+ assert(outer.encode == s"f(${decodeExpr("x")}, 1)")
+ }
+
+ test("stripMargin works across nested builders") {
+ val inner =
+ pyb"""A
+ |B"""
+ val outer =
+ pyb"""|start
+ |$inner
+ |end"""
+ assert(outer.plain == "start\nA\nB\nend")
+ }
+
+ test("""format(): EncodableString arg after closing quote is allowed""") {
+ val workflowParam = "wf"
+ val portParam = PythonTemplateBuilder.EncodableStringRenderer("P")
+
+ val builder = pyb""""$workflowParam".format($portParam)"""
+ assert(builder.plain == "\"wf\".format(P)")
+ assert(builder.encode.contains("self.decode_python_template("))
+ }
+
+ test("format(): nested PythonTemplateBuilder containing UI is allowed (no runtime false positive)") {
+ val workflowParam = "wf"
+ val portParam = pyb"int (${PythonTemplateBuilder.EncodableStringRenderer("\\.")}),"
+
+ val builder = pyb""""$workflowParam".format($portParam)"""
+ assert(builder.plain.contains("format(int (\\.),"))
+ assert(builder.encode.contains("self.decode_python_template("))
+ }
+
+ test("still rejects nested UI builder inside Python quotes at runtime") {
+ val portParam = pyb"${PythonTemplateBuilder.EncodableStringRenderer("P")}"
+
+ intercept[IllegalArgumentException] {
+ pyb"print('${portParam}')".plain
+ }
+ }
+}
diff --git a/common/workflow-operator/build.sbt b/common/workflow-operator/build.sbt
index 9f7f5b22a47..6af8b3c6ae1 100644
--- a/common/workflow-operator/build.sbt
+++ b/common/workflow-operator/build.sbt
@@ -116,3 +116,5 @@ libraryDependencies ++= Seq(
"org.apache.lucene" % "lucene-analyzers-common" % "8.11.4",
"io.github.redouane59.twitter" % "twittered" % "2.21"
)
+
+libraryDependencies += "io.github.classgraph" % "classgraph" % "4.8.184" % Test
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceIrisLogisticRegressionOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceIrisLogisticRegressionOpDesc.scala
index 81ae9e03df5..9a9ac563250 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceIrisLogisticRegressionOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceIrisLogisticRegressionOpDesc.scala
@@ -21,21 +21,24 @@ package org.apache.texera.amber.operator.huggingFace
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+
class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "petalLengthCmAttribute", required = true)
@JsonPropertyDescription("attribute in your dataset corresponding to PetalLengthCm")
@AutofillAttributeName
- var petalLengthCmAttribute: String = _
+ var petalLengthCmAttribute: EncodableString = _
@JsonProperty(value = "petalWidthCmAttribute", required = true)
@JsonPropertyDescription("attribute in your dataset corresponding to PetalWidthCm")
@AutofillAttributeName
- var petalWidthCmAttribute: String = _
+ var petalWidthCmAttribute: EncodableString = _
@JsonProperty(
value = "prediction class name",
@@ -43,7 +46,7 @@ class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
defaultValue = "Species_prediction"
)
@JsonPropertyDescription("output attribute name for the predicted class of species")
- var predictionClassName: String = _
+ var predictionClassName: EncodableString = _
@JsonProperty(
value = "prediction probability name",
@@ -53,7 +56,7 @@ class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription(
"output attribute name for the prediction's probability of being a Iris-setosa"
)
- var predictionProbabilityName: String = _
+ var predictionProbabilityName: EncodableString = _
/**
* Python code to apply a pre-trained liner regression model on the Iris dataset.
@@ -62,7 +65,7 @@ class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
* @return a String representation of the executable Python source code.
*/
override def generatePythonCode(): String = {
- s"""from pytexera import *
+ pyb"""from pytexera import *
|import numpy as np
|import torch
|import torch.nn as nn
@@ -86,8 +89,8 @@ class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
| def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:
| training_features_means = [3.72666667, 1.17619048]
| training_features_stds = [1.72528903, 0.73788937]
- | length = tuple_["$petalLengthCmAttribute"]
- | width = tuple_["$petalWidthCmAttribute"]
+ | length = tuple_[$petalLengthCmAttribute]
+ | width = tuple_[$petalWidthCmAttribute]
| features = np.array([[length, width]])
| features = ((features - training_features_means) / training_features_stds)
| features = torch.from_numpy(features).float()
@@ -95,9 +98,9 @@ class HuggingFaceIrisLogisticRegressionOpDesc extends PythonOperatorDescriptor {
| logits = self.model(features)
| proba = torch.sigmoid(logits.squeeze())
| preds = (proba > 0.5).long()
- | tuple_["$predictionProbabilityName"] = float(proba)
- | tuple_["$predictionClassName"] = "Iris-setosa" if preds == 1 else "Not Iris-setosa"
- | yield tuple_""".stripMargin
+ | tuple_[$predictionProbabilityName] = float(proba)
+ | tuple_[$predictionClassName] = "Iris-setosa" if preds == 1 else "Not Iris-setosa"
+ | yield tuple_""".encode
}
override def operatorInfo: OperatorInfo =
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala
index 551b25b4810..1d6cc7be9c5 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSentimentAnalysisOpDesc.scala
@@ -25,11 +25,13 @@ import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentit
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "attribute", required = true)
@JsonPropertyDescription("column to perform sentiment analysis on")
@AutofillAttributeName
- var attribute: String = _
+ var attribute: EncodableString = _
@JsonProperty(
value = "Positive result attribute",
@@ -37,7 +39,7 @@ class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor {
defaultValue = "huggingface_sentiment_positive"
)
@JsonPropertyDescription("column name of the sentiment analysis result (positive)")
- var resultAttributePositive: String = _
+ var resultAttributePositive: EncodableString = _
@JsonProperty(
value = "Neutral result attribute",
@@ -45,7 +47,7 @@ class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor {
defaultValue = "huggingface_sentiment_neutral"
)
@JsonPropertyDescription("column name of the sentiment analysis result (neutral)")
- var resultAttributeNeutral: String = _
+ var resultAttributeNeutral: EncodableString = _
@JsonProperty(
value = "Negative result attribute",
@@ -53,10 +55,10 @@ class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor {
defaultValue = "huggingface_sentiment_negative"
)
@JsonPropertyDescription("column name of the sentiment analysis result (negative)")
- var resultAttributeNegative: String = _
+ var resultAttributeNegative: EncodableString = _
override def generatePythonCode(): String = {
- s"""from pytexera import *
+ pyb"""from pytexera import *
|from transformers import pipeline
|from transformers import AutoModelForSequenceClassification
|from transformers import TFAutoModelForSequenceClassification
@@ -74,16 +76,16 @@ class HuggingFaceSentimentAnalysisOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:
- | encoded_input = self.tokenizer(tuple_["$attribute"], return_tensors='pt')
+ | encoded_input = self.tokenizer(tuple_[$attribute], return_tensors='pt')
| output = self.model(**encoded_input)
| scores = softmax(output[0][0].detach().numpy())
| ranking = np.argsort(scores)[::-1]
- | labels = {"positive": "$resultAttributePositive", "neutral": "$resultAttributeNeutral", "negative": "$resultAttributeNegative"}
+ | labels = {"positive": $resultAttributePositive, "neutral": $resultAttributeNeutral, "negative": $resultAttributeNegative}
| for i in range(scores.shape[0]):
| label = labels[self.config.id2label[ranking[i]]]
| score = scores[ranking[i]]
| tuple_[label] = np.round(float(score), 4)
- | yield tuple_""".stripMargin
+ | yield tuple_""".encode
}
override def operatorInfo: OperatorInfo =
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSpamSMSDetectionOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSpamSMSDetectionOpDesc.scala
index d5fdb24deba..cef9525570e 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSpamSMSDetectionOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceSpamSMSDetectionOpDesc.scala
@@ -25,11 +25,13 @@ import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentit
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
class HuggingFaceSpamSMSDetectionOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "attribute", required = true)
@JsonPropertyDescription("column to perform spam detection on")
@AutofillAttributeName
- var attribute: String = _
+ var attribute: EncodableString = _
@JsonProperty(
value = "Spam result attribute",
@@ -37,7 +39,7 @@ class HuggingFaceSpamSMSDetectionOpDesc extends PythonOperatorDescriptor {
defaultValue = "is_spam"
)
@JsonPropertyDescription("column name of whether spam or not")
- var resultAttributeSpam: String = _
+ var resultAttributeSpam: EncodableString = _
@JsonProperty(
value = "Score result attribute",
@@ -45,10 +47,10 @@ class HuggingFaceSpamSMSDetectionOpDesc extends PythonOperatorDescriptor {
defaultValue = "score"
)
@JsonPropertyDescription("column name of Probability for classification")
- var resultAttributeProbability: String = _
+ var resultAttributeProbability: EncodableString = _
override def generatePythonCode(): String = {
- s"""from transformers import pipeline
+ pyb"""from transformers import pipeline
|from pytexera import *
|
|class ProcessTupleOperator(UDFOperatorV2):
@@ -58,10 +60,10 @@ class HuggingFaceSpamSMSDetectionOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:
- | result = self.pipeline(tuple_["$attribute"])[0]
- | tuple_["$resultAttributeSpam"] = (result["label"] == "LABEL_1")
- | tuple_["$resultAttributeProbability"] = result["score"]
- | yield tuple_""".stripMargin
+ | result = self.pipeline(tuple_[$attribute])[0]
+ | tuple_[$resultAttributeSpam] = (result["label"] == "LABEL_1")
+ | tuple_[$resultAttributeProbability] = result["score"]
+ | yield tuple_""".encode
}
override def operatorInfo: OperatorInfo =
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceTextSummarizationOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceTextSummarizationOpDesc.scala
index 41d16c5c1b4..6b5c5449b42 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceTextSummarizationOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/huggingFace/HuggingFaceTextSummarizationOpDesc.scala
@@ -25,11 +25,13 @@ import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentit
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
class HuggingFaceTextSummarizationOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "attribute", required = true)
@JsonPropertyDescription("attribute to perform text summarization on")
@AutofillAttributeName
- var attribute: String = _
+ var attribute: EncodableString = _
@JsonProperty(
value = "Result attribute name",
@@ -37,10 +39,10 @@ class HuggingFaceTextSummarizationOpDesc extends PythonOperatorDescriptor {
defaultValue = "summary"
)
@JsonPropertyDescription("attribute name of the text summary result")
- var resultAttribute: String = _
+ var resultAttribute: EncodableString = _
override def generatePythonCode(): String = {
- s"""
+ pyb"""
|from transformers import BertTokenizerFast, EncoderDecoderModel
|import torch
|from pytexera import *
@@ -55,7 +57,7 @@ class HuggingFaceTextSummarizationOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:
- | text = tuple_["$attribute"]
+ | text = tuple_[$attribute]
|
| inputs = self.tokenizer([text], padding="max_length", truncation=True, max_length=512, return_tensors="pt")
| input_ids = inputs.input_ids.to(self.device)
@@ -63,8 +65,8 @@ class HuggingFaceTextSummarizationOpDesc extends PythonOperatorDescriptor {
|
| output = self.model.generate(input_ids, attention_mask=attention_mask)
| summary = self.tokenizer.decode(output[0], skip_special_tokens=True)
- | tuple_["$resultAttribute"] = summary
- | yield tuple_""".stripMargin
+ | tuple_[$resultAttribute] = summary
+ | yield tuple_""".encode
}
override def operatorInfo: OperatorInfo =
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDesc.scala
index 0020b547aee..a2f72a513ed 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDesc.scala
@@ -26,6 +26,8 @@ import com.kjetland.jackson.jsonSchema.annotations.{
JsonSchemaTitle
}
import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{AutofillAttributeName, HideAnnotation}
@@ -43,13 +45,13 @@ class MachineLearningScorerOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Actual Value")
@JsonPropertyDescription("Specify the label attribute")
@AutofillAttributeName
- var actualValueColumn: String = ""
+ var actualValueColumn: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Predicted Value")
@JsonPropertyDescription("Specify the attribute generated by the model")
@AutofillAttributeName
- var predictValueColumn: String = ""
+ var predictValueColumn: EncodableString = ""
@JsonProperty(required = false, value = "classificationFlag")
@JsonSchemaTitle("Scorer Functions")
@@ -113,14 +115,14 @@ class MachineLearningScorerOpDesc extends PythonOperatorDescriptor {
// scorer.getName()
// }
- private def getMetricName(metric: Any): String =
+ private def getMetricName(metric: Any): EncodableString =
metric match {
case m: regressionMetricsFnc => m.getName()
case m: classificationMetricsFnc => m.getName()
case _ => throw new IllegalArgumentException("Unknown metric type")
}
- private def getSelectedMetrics(): String = {
+ private def getSelectedMetrics(): EncodableString = {
// Return a string of metrics using the getEachScorerName() method
val metric = if (isRegression) regressionMetrics else classificationMetrics
metric.map(metric => getMetricName(metric)).mkString("'", "','", "'")
@@ -129,7 +131,7 @@ class MachineLearningScorerOpDesc extends PythonOperatorDescriptor {
override def generatePythonCode(): String = {
val isRegressionStr = if (isRegression) "True" else "False"
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|import pandas as pd
|from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, root_mean_squared_error, mean_absolute_error, r2_score
@@ -172,8 +174,8 @@ class MachineLearningScorerOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | y_true = table['$actualValueColumn']
- | y_pred = table['$predictValueColumn']
+ | y_true = table[$actualValueColumn]
+ | y_pred = table[$predictValueColumn]
|
| metric_list = [${getSelectedMetrics()}]
|
@@ -185,8 +187,8 @@ class MachineLearningScorerOpDesc extends PythonOperatorDescriptor {
| result = classification_metrics(y_true, y_pred, metric_list, labels)
|
| yield result
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParameters.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParameters.scala
index f947ee69488..13fdb9aa60f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParameters.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParameters.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations._
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.{
CommonOpDescAnnotation,
HideAnnotation
@@ -48,7 +49,7 @@ class HyperParameters[T] {
)
)
@JsonProperty(value = "attribute")
- var attribute: String = _
+ var attribute: EncodableString = _
@JsonSchemaInject(
strings = Array(
@@ -59,7 +60,7 @@ class HyperParameters[T] {
bools = Array(new JsonSchemaBool(path = HideAnnotation.hideOnNull, value = true))
)
@JsonProperty(value = "value")
- var value: String = _
+ var value: EncodableString = _
@JsonProperty(defaultValue = "false")
@JsonSchemaTitle("Workflow")
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnAdvancedBaseDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnAdvancedBaseDesc.scala
index 189eb0be799..3127fa91232 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnAdvancedBaseDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnAdvancedBaseDesc.scala
@@ -22,6 +22,8 @@ package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base
import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{
@@ -29,6 +31,7 @@ import org.apache.texera.amber.operator.metadata.annotations.{
AutofillAttributeNameList
}
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
trait ParamClass {
def getName: String
@@ -50,64 +53,53 @@ abstract class SklearnMLOperatorDescriptor[T <: ParamClass] extends PythonOperat
@JsonSchemaTitle("Ground Truth Attribute Column")
@JsonPropertyDescription("Ground truth attribute column")
@AutofillAttributeName
- var groundTruthAttribute: String = ""
+ var groundTruthAttribute: EncodableString = ""
@JsonProperty(value = "Selected Features", required = true)
@JsonSchemaTitle("Selected Features")
@JsonPropertyDescription("Features used to train the model")
@AutofillAttributeNameList
- var selectedFeatures: List[String] = _
+ var selectedFeatures: List[EncodableString] = _
- private def getLoopTimes(paraList: List[HyperParameters[T]]): String = {
+ private def getLoopTimes(paraList: List[HyperParameters[T]]): PythonTemplateBuilder = {
for (ele <- paraList) {
if (ele.parametersSource) {
- return s"""table[\"${ele.attribute}\"].values.shape[0]"""
+ return pyb"""table[${ele.attribute}].values.shape[0]"""
}
}
- "1"
+ pyb"1"
}
- def getParameter(paraList: List[HyperParameters[T]]): List[String] = {
- var workflowParam = "";
- var portParam = "";
- var paramString = ""
+ def getParameter(paraList: List[HyperParameters[T]]): List[PythonTemplateBuilder] = {
+ var workflowParam = s"";
+ var portParam = pyb"";
+ var paramString = pyb""
for (ele <- paraList) {
if (ele.parametersSource) {
- workflowParam = workflowParam + String.format("%s = {},", ele.parameter.getName)
+ workflowParam = s"$workflowParam${ele.parameter.getName} = {},"
portParam =
- portParam + String.format(
- "%s(table['%s'].values[i]),",
- ele.parameter.getType,
- ele.attribute
- )
- paramString = paramString + String.format(
- "%s = %s(table['%s'].values[i]),",
- ele.parameter.getName,
- ele.parameter.getType,
- ele.attribute
- )
+ portParam + pyb"${ele.parameter.getType}(table[${ele.attribute}].values[i]),"
+ paramString =
+ pyb"$paramString${ele.parameter.getName} = ${ele.parameter.getType}(table[${ele.attribute}].values[i]),"
} else {
- workflowParam = workflowParam + String.format("%s = {},", ele.parameter.getName)
- portParam = portParam + String.format("%s ('%s'),", ele.parameter.getType, ele.value)
- paramString = paramString + String.format(
- "%s = %s ('%s'),",
- ele.parameter.getName,
- ele.parameter.getType,
- ele.value
- )
+ workflowParam = s"$workflowParam${ele.parameter.getName} = {},"
+ portParam = pyb"$portParam${ele.parameter.getType} (${ele.value}),"
+ paramString =
+ pyb"$paramString${ele.parameter.getName} = ${ele.parameter.getType} (${ele.value}),"
}
}
- List(String.format("\"%s\".format(%s)", workflowParam, portParam), paramString)
+ List(pyb""""$workflowParam".format($portParam)""", paramString)
+
}
override def generatePythonCode(): String = {
- val listFeatures = selectedFeatures.map(feature => s""""$feature"""").mkString(",")
+ val listFeatures = selectedFeatures.map(feature => pyb"""$feature""").mkString(",")
val trainingName = getImportStatements.split(" ").last
val stringList = getParameter(paraList)
val trainingParam = stringList(1)
val paramString = stringList(0)
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import pandas as pd
@@ -125,7 +117,7 @@ abstract class SklearnMLOperatorDescriptor[T <: ParamClass] extends PythonOperat
| self.dataset = table
|
| if port == 1 :
- | y_train = self.dataset["$groundTruthAttribute"]
+ | y_train = self.dataset[$groundTruthAttribute]
| X_train = self.dataset[features]
| loop_times = ${getLoopTimes(paraList)}
|
@@ -143,8 +135,8 @@ abstract class SklearnMLOperatorDescriptor[T <: ParamClass] extends PythonOperat
| df = pd.DataFrame(data)
| yield df
|
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
override def operatorInfo: OperatorInfo = {
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnClassifierOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnClassifierOpDesc.scala
index 3fcea191e18..0c8a103c52b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnClassifierOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnClassifierOpDesc.scala
@@ -27,6 +27,8 @@ import com.kjetland.jackson.jsonSchema.annotations.{
JsonSchemaTitle
}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{
@@ -42,7 +44,7 @@ abstract class SklearnClassifierOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("Attribute in your dataset corresponding to target.")
@JsonProperty(required = true)
@AutofillAttributeName
- var target: String = _
+ var target: EncodableString = _
@JsonSchemaTitle("Count Vectorizer")
@JsonPropertyDescription("Convert a collection of text documents to a matrix of token counts.")
@@ -65,7 +67,7 @@ abstract class SklearnClassifierOpDesc extends PythonOperatorDescriptor {
new JsonSchemaInt(path = CommonOpDescAnnotation.autofillAttributeOnPort, value = 0)
)
)
- var text: String = _
+ var text: EncodableString = _
@JsonSchemaTitle("Tfidf Transformer")
@JsonPropertyDescription("Transform a count matrix to a normalized tf or tf-idf representation.")
@@ -86,7 +88,7 @@ abstract class SklearnClassifierOpDesc extends PythonOperatorDescriptor {
def getUserFriendlyModelName = ""
override def generatePythonCode(): String =
- s"""$getImportStatements
+ pyb"""$getImportStatements
|from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|from sklearn.pipeline import make_pipeline
|from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
@@ -95,9 +97,9 @@ abstract class SklearnClassifierOpDesc extends PythonOperatorDescriptor {
|class ProcessTableOperator(UDFTableOperator):
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | Y = table["$target"]
- | X = table.drop("$target", axis=1)
- | X = ${if (countVectorizer) "X['" + text + "']" else "X"}
+ | Y = table[$target]
+ | X = table.drop($target, axis=1)
+ | X = ${if (countVectorizer) pyb"X[$text]" else "X"}
| if port == 0:
| self.model = make_pipeline(${if (countVectorizer) "CountVectorizer(),"
else ""} ${if (tfidfTransformer) "TfidfTransformer()," else ""} ${getImportStatements
@@ -111,7 +113,7 @@ abstract class SklearnClassifierOpDesc extends PythonOperatorDescriptor {
| recalls = recall_score(Y, predictions, average=None)
| for i, class_name in enumerate(np.unique(Y)):
| print("Class", repr(class_name), " - F1:", round(f1s[i], 4), ", Precision:", round(precisions[i], 4), ", Recall:", round(recalls[i], 4))
- | yield {"model_name" : "$getUserFriendlyModelName", "model" : self.model}""".stripMargin
+ | yield {"model_name" : "$getUserFriendlyModelName", "model" : self.model}""".encode
override def operatorInfo: OperatorInfo =
OperatorInfo(
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnLinearRegressionOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnLinearRegressionOpDesc.scala
index 1c4c7e6288c..f99da2bff47 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnLinearRegressionOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnLinearRegressionOpDesc.scala
@@ -22,6 +22,8 @@ package org.apache.texera.amber.operator.sklearn
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -33,7 +35,7 @@ class SklearnLinearRegressionOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("Attribute in your dataset corresponding to target.")
@JsonProperty(required = true)
@AutofillAttributeName
- var target: String = _
+ var target: EncodableString = _
@JsonSchemaTitle("Degree")
@JsonPropertyDescription("Degree of polynomial function")
@@ -41,7 +43,7 @@ class SklearnLinearRegressionOpDesc extends PythonOperatorDescriptor {
val degree: Int = 1
override def generatePythonCode(): String =
- s"""
+ pyb"""
|from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, mean_absolute_error, r2_score
|from sklearn.pipeline import make_pipeline
|from sklearn.linear_model import LinearRegression
@@ -51,8 +53,8 @@ class SklearnLinearRegressionOpDesc extends PythonOperatorDescriptor {
|class ProcessTableOperator(UDFTableOperator):
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | Y = table["$target"]
- | X = table.drop("$target", axis=1)
+ | Y = table[$target]
+ | X = table.drop($target, axis=1)
| if port == 0:
| pipeline = make_pipeline(
| PolynomialFeatures(degree=$degree),
@@ -64,7 +66,7 @@ class SklearnLinearRegressionOpDesc extends PythonOperatorDescriptor {
| mae = round(mean_absolute_error(Y, predictions), 4)
| r2 = round(r2_score(Y, predictions), 4)
| print("MAE:", mae, ", R2:", r2)
- | yield {"model_name" : "LinearRegression", "model" : self.model}""".stripMargin
+ | yield {"model_name" : "LinearRegression", "model" : self.model}""".encode
override def operatorInfo: OperatorInfo =
OperatorInfo(
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnPredictionOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnPredictionOpDesc.scala
index 0e0a5772f36..92557fa78df 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnPredictionOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/SklearnPredictionOpDesc.scala
@@ -21,6 +21,8 @@ package org.apache.texera.amber.operator.sklearn
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{
@@ -33,11 +35,11 @@ class SklearnPredictionOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "Model Attribute", required = true, defaultValue = "model")
@JsonPropertyDescription("attribute corresponding to ML model")
@AutofillAttributeName
- var model: String = _
+ var model: EncodableString = _
@JsonProperty(value = "Output Attribute Name", required = true, defaultValue = "prediction")
@JsonPropertyDescription("attribute name of the prediction result")
- var resultAttribute: String = _
+ var resultAttribute: EncodableString = _
@JsonProperty(
value = "Ground Truth Attribute Name to Ignore",
@@ -46,24 +48,24 @@ class SklearnPredictionOpDesc extends PythonOperatorDescriptor {
)
@JsonPropertyDescription("attribute name of the ground truth")
@AutofillAttributeNameOnPort1
- var groundTruthAttribute: String = ""
+ var groundTruthAttribute: EncodableString = ""
override def generatePythonCode(): String =
- s"""from pytexera import *
+ pyb"""from pytexera import *
|from sklearn.pipeline import Pipeline
|class ProcessTupleOperator(UDFOperatorV2):
| @overrides
| def process_tuple(self, tuple_: Tuple, port: int) -> Iterator[Optional[TupleLike]]:
| if port == 0:
- | self.model = tuple_["$model"]
+ | self.model = tuple_[$model]
| else:
| input_features = tuple_
- | if "$groundTruthAttribute" != "":
- | input_features = input_features.get_partial_tuple([col for col in tuple_.get_field_names() if col != "$groundTruthAttribute"])
- | tuple_["$resultAttribute"] = type(tuple_["$groundTruthAttribute"])(self.model.predict(Table.from_tuple_likes([input_features]))[0])
+ | if $groundTruthAttribute != "":
+ | input_features = input_features.get_partial_tuple([col for col in tuple_.get_field_names() if col != $groundTruthAttribute])
+ | tuple_[$resultAttribute] = type(tuple_[$groundTruthAttribute])(self.model.predict(Table.from_tuple_likes([input_features]))[0])
| else:
- | tuple_["$resultAttribute"] = str(self.model.predict(Table.from_tuple_likes([input_features]))[0])
- | yield tuple_""".stripMargin
+ | tuple_[$resultAttribute] = str(self.model.predict(Table.from_tuple_likes([input_features]))[0])
+ | yield tuple_""".encode
override def operatorInfo: OperatorInfo =
OperatorInfo(
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingOpDesc.scala
index c842ffafac3..c0d1fd3a511 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sklearn/training/SklearnTrainingOpDesc.scala
@@ -27,6 +27,8 @@ import com.kjetland.jackson.jsonSchema.annotations.{
JsonSchemaTitle
}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{
@@ -42,7 +44,7 @@ class SklearnTrainingOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("Attribute in your dataset corresponding to target.")
@JsonProperty(required = true)
@AutofillAttributeName
- var target: String = _
+ var target: EncodableString = _
@JsonSchemaTitle("Count Vectorizer")
@JsonPropertyDescription("Convert a collection of text documents to a matrix of token counts.")
@@ -65,7 +67,7 @@ class SklearnTrainingOpDesc extends PythonOperatorDescriptor {
new JsonSchemaInt(path = CommonOpDescAnnotation.autofillAttributeOnPort, value = 0)
)
)
- var text: String = _
+ var text: EncodableString = _
@JsonSchemaTitle("Tfidf Transformer")
@JsonPropertyDescription("Transform a count matrix to a normalized tf or tf-idf representation.")
@@ -86,7 +88,7 @@ class SklearnTrainingOpDesc extends PythonOperatorDescriptor {
def getUserFriendlyModelName = "RandomForest Training"
override def generatePythonCode(): String =
- s"""$getImportStatements
+ pyb"""$getImportStatements
|from sklearn.pipeline import make_pipeline
|from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|import numpy as np
@@ -94,16 +96,16 @@ class SklearnTrainingOpDesc extends PythonOperatorDescriptor {
|class ProcessTableOperator(UDFTableOperator):
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | Y = table["$target"]
- | X = table.drop("$target", axis=1)
- | X = ${if (countVectorizer) "X['" + text + "']" else "X"}
+ | Y = table[$target]
+ | X = table.drop($target, axis=1)
+ | X = ${if (countVectorizer) "X[" + text + "]" else "X"}
| model = make_pipeline(${if (countVectorizer) "CountVectorizer()," else ""} ${if (
tfidfTransformer
) "TfidfTransformer(),"
else ""} ${getImportStatements.split(" ").last}()).fit(X, Y)
| yield {"model_name" : "$getUserFriendlyModelName", "model" : model}
|
- | """.stripMargin
+ | """.encode
override def operatorInfo: OperatorInfo =
OperatorInfo(
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortCriteriaUnit.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortCriteriaUnit.scala
index b4bae266ef9..f0e851110b8 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortCriteriaUnit.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortCriteriaUnit.scala
@@ -20,6 +20,7 @@
package org.apache.texera.amber.operator.sort
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
class SortCriteriaUnit {
@@ -27,7 +28,7 @@ class SortCriteriaUnit {
@JsonProperty(value = "attribute", required = true)
@JsonPropertyDescription("Attribute name to sort by")
@AutofillAttributeName
- var attributeName: String = _
+ var attributeName: EncodableString = _
@JsonProperty(value = "sortPreference", required = true)
@JsonPropertyDescription("Sort preference (ASC or DESC)")
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortOpDesc.scala
index 2e46fb81333..0825422e6c1 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/sort/SortOpDesc.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.sort
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import org.apache.texera.amber.core.tuple.Schema
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
@@ -32,7 +33,7 @@ class SortOpDesc extends PythonOperatorDescriptor {
override def generatePythonCode(): String = {
val attributeName = "[" + attributes
.map { criteria =>
- s""""${criteria.attributeName}""""
+ pyb"""${criteria.attributeName}"""
}
.mkString(", ") + "]"
val sortOrders: String = "[" + attributes
@@ -44,7 +45,7 @@ class SortOpDesc extends PythonOperatorDescriptor {
}
.mkString(", ") + "]"
- s"""from pytexera import *
+ pyb"""from pytexera import *
|import pandas as pd
|from datetime import datetime
|
@@ -56,7 +57,7 @@ class SortOpDesc extends PythonOperatorDescriptor {
| ascending_orders = $sortOrders
|
| sorted_df = table.sort_values(by=sort_columns, ascending=ascending_orders)
- | yield sorted_df""".stripMargin
+ | yield sorted_df""".encode
}
def getOutputSchemas(inputSchemas: Map[PortIdentity, Schema]): Map[PortIdentity, Schema] = {
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDesc.scala
index adaef827180..85cf30bf06b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDesc.scala
@@ -22,6 +22,8 @@ package org.apache.texera.amber.operator.source.apis.reddit
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{OutputPort, PortIdentity}
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
import org.apache.texera.amber.operator.source.PythonSourceOperatorDescriptor
@@ -30,17 +32,17 @@ class RedditSearchSourceOpDesc extends PythonSourceOperatorDescriptor {
@JsonProperty(required = true)
@JsonSchemaTitle("Client Id")
@JsonPropertyDescription("Client id that uses to access Reddit API")
- var clientId: String = _
+ var clientId: EncodableString = _
@JsonProperty(required = true)
@JsonSchemaTitle("Client Secret")
@JsonPropertyDescription("Client secret that uses to access Reddit API")
- var clientSecret: String = _
+ var clientSecret: EncodableString = _
@JsonProperty(required = true)
@JsonSchemaTitle("Query")
@JsonPropertyDescription("Search query")
- var query: String = _
+ var query: EncodableString = _
@JsonProperty(required = true, defaultValue = "100")
@JsonSchemaTitle("Limit")
@@ -53,20 +55,20 @@ class RedditSearchSourceOpDesc extends PythonSourceOperatorDescriptor {
var sorting: RedditSourceOperatorFunction = _
override def generatePythonCode(): String = {
- val clientIdReal = this.clientId.replace("\n", "").trim
- val clientSecretReal = this.clientSecret.replace("\n", "").trim
- val queryReal = this.query.replace("\n", "").trim
+ val clientIdReal: EncodableString = this.clientId.replace("\n", "").trim
+ val clientSecretReal: EncodableString = this.clientSecret.replace("\n", "").trim
+ val queryReal: EncodableString = this.query.replace("\n", "").trim
- s"""from pytexera import *
+ pyb"""from pytexera import *
|import praw
|from datetime import datetime
|
|class ProcessTupleOperator(UDFSourceOperator):
- | client_id = '$clientIdReal'
- | client_secret = '$clientSecretReal'
+ | client_id = $clientIdReal
+ | client_secret = $clientSecretReal
| limit = $limit
- | query = '$queryReal'
- | sorting = '${sorting.getName}'
+ | query = $queryReal
+ | sorting = ${sorting.getName}
|
| @overrides
| def produce(self) -> Iterator[Union[TupleLike, TableLike, None]]:
@@ -116,7 +118,7 @@ class RedditSearchSourceOpDesc extends PythonSourceOperatorDescriptor {
| 'author_name': author.name,
| 'subreddit': subreddit
| })
- | yield tuple_submission""".stripMargin
+ | yield tuple_submission""".encode
}
override def operatorInfo: OperatorInfo =
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesPlot.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesPlot.scala
index aae1c339331..0fdcb09a5cb 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesPlot.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesPlot.scala
@@ -22,6 +22,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -36,7 +38,7 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The column containing time/date values (e.g., Date, Timestamp).")
@AutofillAttributeName
@NotNull(message = "Time Column cannot be empty")
- var timeColumn: String = ""
+ var timeColumn: EncodableString = ""
@JsonProperty(value = "valueColumn", required = true)
@JsonSchemaTitle("Value Column")
@@ -44,19 +46,19 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
@JsonSchemaInject(json = """{"enum": "autofill"}""")
@AutofillAttributeName
@NotNull(message = "Value Column cannot be empty")
- var valueColumn: String = ""
+ var valueColumn: EncodableString = ""
@JsonProperty(value = "categoryColumn", required = false, defaultValue = "No Selection")
@JsonSchemaTitle("Category Column")
@JsonPropertyDescription("Optional - A categorical column to create separate lines.")
@AutofillAttributeName
- var CategoryColumn: String = "No Selection"
+ var CategoryColumn: EncodableString = "No Selection"
@JsonProperty(value = "facetColumn", required = false, defaultValue = "No Selection")
@JsonSchemaTitle("Facet Column")
@JsonPropertyDescription("Optional - A column to create separate subplots.")
@AutofillAttributeName
- var facetColumn: String = "No Selection"
+ var facetColumn: EncodableString = "No Selection"
@JsonProperty(value = "line", defaultValue = "line", required = true)
@JsonSchemaTitle("Plot Type")
@@ -89,14 +91,14 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
val dropnaCols = List(timeColumn, valueColumn) ++
(if (CategoryColumn != "No Selection") Some(CategoryColumn) else None) ++
(if (facetColumn != "No Selection") Some(facetColumn) else None)
- val dropnaStr = dropnaCols.map(c => s"'$c'").mkString("[", ", ", "]")
+ val dropnaStr = dropnaCols.map(c => pyb"$c").mkString("[", ", ", "]")
- val colorArg = if (CategoryColumn != "No Selection") s", color='$CategoryColumn'" else ""
- val facetArg = if (facetColumn != "No Selection") s", facet_col='$facetColumn'" else ""
+ val colorArg = if (CategoryColumn != "No Selection") pyb", color=$CategoryColumn" else ""
+ val facetArg = if (facetColumn != "No Selection") pyb", facet_col=$facetColumn" else ""
val plotFunc = if (plotType == "area") "px.area" else "px.line"
val showSlider = if (showRangeSlider) "True" else "False"
- s"""
+ pyb"""
|from pytexera import *
|import plotly.express as px
|import plotly.io
@@ -114,14 +116,14 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
| return
|
| try:
- | table['$timeColumn'] = pd.to_datetime(table['$timeColumn'], errors='coerce')
- | table = table.dropna(subset=$dropnaStr).sort_values(by='$timeColumn')
+ | table[$timeColumn] = pd.to_datetime(table[$timeColumn], errors='coerce')
+ | table = table.dropna(subset=$dropnaStr).sort_values(by=$timeColumn)
|
| if table.empty:
| yield {'html-content': self.render_error("Table became empty after filtering.")}
| return
|
- | fig = $plotFunc(table, x='$timeColumn', y='$valueColumn'$colorArg$facetArg)
+ | fig = $plotFunc(table, x=$timeColumn, y=$valueColumn$colorArg$facetArg)
|
| if $showSlider:
| fig.update_xaxes(rangeslider_visible=True)
@@ -129,8 +131,8 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
| fig.update_layout(
| margin=dict(l=0, r=0, t=30, b=0),
| title=dict(text="Time Series Plot", x=0.5),
- | xaxis_title="$timeColumn",
- | yaxis_title="$valueColumn",
+ | xaxis_title=$timeColumn,
+ | yaxis_title=$valueColumn,
| template="plotly_white"
| )
|
@@ -139,6 +141,6 @@ class TimeSeriesOpDesc extends PythonOperatorDescriptor {
|
| except Exception as e:
| yield {'html-content': self.render_error(str(e))}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDesc.scala
index bf2e152c79d..33069a89212 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -37,7 +40,7 @@ class DotPlotOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("the attribute for the counting of the dot plot")
@AutofillAttributeName
@NotNull(message = "Count Attribute column cannot be empty")
- var countAttribute: String = ""
+ var countAttribute: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -57,21 +60,21 @@ class DotPlotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
- s"""
- | table = table.groupby(['$countAttribute'])['$countAttribute'].count().reset_index(name='counts')
- | fig = px.strip(table, x='counts', y='$countAttribute', orientation='h', color='$countAttribute',
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | table = table.groupby([$countAttribute])[$countAttribute].count().reset_index(name='counts')
+ | fig = px.strip(table, x='counts', y=$countAttribute, orientation='h', color=$countAttribute,
| color_discrete_sequence=px.colors.qualitative.Dark2)
|
| fig.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')))
|
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -97,8 +100,8 @@ class DotPlotOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/IcicleChart/IcicleChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/IcicleChart/IcicleChartOpDesc.scala
index 7705194685a..a39e9b2681c 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/IcicleChart/IcicleChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/IcicleChart/IcicleChartOpDesc.scala
@@ -23,11 +23,14 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
import org.apache.texera.amber.operator.visualization.hierarchychart.HierarchySection
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.{NotEmpty, NotNull}
@@ -55,7 +58,7 @@ class IcicleChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("the value associated with the size of each sector in the chart")
@AutofillAttributeName
@NotNull(message = "Value column cannot be empty")
- var value: String = ""
+ var value: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -76,29 +79,29 @@ class IcicleChartOpDesc extends PythonOperatorDescriptor {
)
private def getIcicleAttributesInPython: String =
- hierarchy.map(_.attributeName).mkString("'", "','", "'")
+ hierarchy.map(c => pyb"${c.attributeName}").mkString(",")
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
val attributes = getIcicleAttributesInPython
- s"""
- | table['$value'] = table[table['$value'] > 0]['$value'] # remove non-positive numbers from the data
+ pyb"""
+ | table[$value] = table[table[$value] > 0][$value] # remove non-positive numbers from the data
| table.dropna(subset = [$attributes], inplace = True) #remove missing values
- |""".stripMargin
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(hierarchy.nonEmpty)
val attributes = getIcicleAttributesInPython
- s"""
- | fig = px.icicle(table, path=[$attributes], values='$value',
- | color='$value', hover_data=[$attributes],
+ pyb"""
+ | fig = px.icicle(table, path=[$attributes], values=$value,
+ | color=$value, hover_data=[$attributes],
| color_continuous_scale='RdBu')
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -128,8 +131,8 @@ class IcicleChartOpDesc extends PythonOperatorDescriptor {
| fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDesc.scala
index 2f1b9a5e970..dc9eec9a8ee 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDesc.scala
@@ -23,17 +23,20 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class ImageVisualizerOpDesc extends PythonOperatorDescriptor {
@JsonProperty(required = true)
@JsonSchemaTitle("image content column")
@JsonPropertyDescription("The Binary data of the Image")
@AutofillAttributeName
- var binaryContent: String = _
+ var binaryContent: EncodableString = _
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -53,16 +56,16 @@ class ImageVisualizerOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createBinaryData(): String = {
+ def createBinaryData(): PythonTemplateBuilder = {
assert(binaryContent.nonEmpty)
- s"""
- | binary_image_data = tuple_['$binaryContent']
- |""".stripMargin
+ pyb"""
+ | binary_image_data = tuple_[$binaryContent]
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|import base64
|from io import BytesIO
@@ -92,8 +95,8 @@ class ImageVisualizerOpDesc extends PythonOperatorDescriptor {
| def on_finish(self, port: int) -> Iterator[Optional[TupleLike]]:
| all_images_html = "" + "".join(self.images_html) + "
"
| yield {"html-content": all_images_html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ScatterMatrixChart/ScatterMatrixChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ScatterMatrixChart/ScatterMatrixChartOpDesc.scala
index bcb159a8119..3bfc5eb6b68 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ScatterMatrixChart/ScatterMatrixChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ScatterMatrixChart/ScatterMatrixChartOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.{
@@ -30,6 +32,7 @@ import org.apache.texera.amber.operator.metadata.annotations.{
AutofillAttributeNameList
}
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonSchemaInject(json = """
{
"attributeTypeRules": {
@@ -45,13 +48,13 @@ class ScatterMatrixChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Selected Attributes")
@JsonPropertyDescription("The axes of each scatter plot in the matrix.")
@AutofillAttributeNameList
- var selectedAttributes: List[String] = _
+ var selectedAttributes: List[EncodableString] = _
@JsonProperty(value = "Color", required = true)
@JsonSchemaTitle("Color Column")
@JsonPropertyDescription("Column to color points")
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -71,20 +74,20 @@ class ScatterMatrixChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(selectedAttributes.nonEmpty)
- val list_Attributes = selectedAttributes.map(attribute => s""""$attribute"""").mkString(",")
- s"""
- | fig = px.scatter_matrix(table, dimensions=[$list_Attributes], color='$color')
+ val list_Attributes = selectedAttributes.map(attribute => pyb"""$attribute""").mkString(",")
+ pyb"""
+ | fig = px.scatter_matrix(table, dimensions=[$list_Attributes], color=$color)
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -101,8 +104,8 @@ class ScatterMatrixChartOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDesc.scala
index c2a8740c30e..723b64bd3d0 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -47,20 +50,20 @@ class BarChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The value associated with each category")
@AutofillAttributeName
@NotNull(message = "Value column cannot be empty")
- var value: String = ""
+ var value: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Fields")
@JsonPropertyDescription("Visualize categorical data in a Bar Chart")
@AutofillAttributeName
@NotNull(message = "Fields cannot be empty")
- var fields: String = ""
+ var fields: EncodableString = ""
@JsonProperty(defaultValue = "No Selection", required = false)
@JsonSchemaTitle("Category Column")
@JsonPropertyDescription("Optional - Select a column to Color Code the Categories")
@AutofillAttributeName
- var categoryColumn: String = ""
+ var categoryColumn: EncodableString = ""
@JsonProperty(defaultValue = "false")
@JsonSchemaTitle("Horizontal Orientation")
@@ -71,7 +74,7 @@ class BarChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Pattern")
@JsonPropertyDescription("Add texture to the chart based on an attribute")
@AutofillAttributeName
- var pattern: String = ""
+ var pattern: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -91,12 +94,12 @@ class BarChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(value.nonEmpty, "Value column cannot be empty")
assert(fields.nonEmpty, "Fields cannot be empty")
- s"""
- | table = table.dropna(subset = ['$value', '$fields']) #remove missing values
- |""".stripMargin
+ pyb"""
+ | table = table.dropna(subset = [$value, $fields]) #remove missing values
+ |"""
}
override def generatePythonCode(): String = {
@@ -114,7 +117,7 @@ class BarChartOpDesc extends PythonOperatorDescriptor {
isCategoryColumn = "True"
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -136,22 +139,22 @@ class BarChartOpDesc extends PythonOperatorDescriptor {
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
| ${manipulateTable()}
- | if not table.empty and '$fields' != '$value':
+ | if not table.empty and $fields != $value:
| if $isHorizontalOrientation:
- | fig = go.Figure(px.bar(table, y='$fields', x='$value', color="$categoryColumn" if $isCategoryColumn else None, pattern_shape="$pattern" if $isPatternSelected else None, orientation = 'h'))
+ | fig = go.Figure(px.bar(table, y=$fields, x=$value, color=$categoryColumn if $isCategoryColumn else None, pattern_shape=$pattern if $isPatternSelected else None, orientation = 'h'))
| else:
- | fig = go.Figure(px.bar(table, y='$value', x='$fields', color="$categoryColumn" if $isCategoryColumn else None, pattern_shape="$pattern" if $isPatternSelected else None))
+ | fig = go.Figure(px.bar(table, y=$value, x=$fields, color=$categoryColumn if $isCategoryColumn else None, pattern_shape=$pattern if $isPatternSelected else None))
| fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
| html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
| # use latest plotly lib in html
| #html = html.replace('https://cdn.plot.ly/plotly-2.3.1.min.js', 'https://cdn.plot.ly/plotly-2.18.2.min.js')
- | elif '$fields' == '$value':
+ | elif $fields == $value:
| html = self.render_error('Fields should not have the same value.')
| elif table.empty:
| html = self.render_error('Table should not have any empty/null values or fields.')
| yield {'html-content':html}
- | """.stripMargin
- finalCode
+ | """
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/boxViolinPlot/BoxViolinPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/boxViolinPlot/BoxViolinPlotOpDesc.scala
index 5f57f17937c..9f3a2a1f31e 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/boxViolinPlot/BoxViolinPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/boxViolinPlot/BoxViolinPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription,
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonPropertyOrder(Array("value", "quartileType", "horizontalOrientation", "violinPlot"))
@JsonSchemaInject(json = """
@@ -44,7 +47,7 @@ class BoxViolinPlotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Value Column")
@JsonPropertyDescription("Data column for box plot")
@AutofillAttributeName
- var value: String = ""
+ var value: EncodableString = ""
@JsonProperty(
value = "Quartile Method",
@@ -83,38 +86,38 @@ class BoxViolinPlotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(value.nonEmpty)
- s"""
- | table = table.dropna(subset = ['$value']) #remove missing values
- |
- |""".stripMargin
+ pyb"""
+ | table = table.dropna(subset = [$value]) #remove missing values
+ |
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
val horizontal = if (horizontalOrientation) "True" else "False"
val violin = if (violinPlot) "True" else "False"
- s"""
+ pyb"""
| if($violin):
| if ($horizontal):
- | fig = px.violin(table, x='$value', box=True, points='all')
+ | fig = px.violin(table, x=$value, box=True, points='all')
| else:
- | fig = px.violin(table, y='$value', box=True, points='all')
+ | fig = px.violin(table, y=$value, box=True, points='all')
| else:
| if($horizontal):
- | fig = px.box(table, x='$value',boxmode="overlay", points='all')
+ | fig = px.box(table, x=$value,boxmode="overlay", points='all')
| else:
- | fig = px.box(table, y='$value',boxmode="overlay", points='all')
+ | fig = px.box(table, y=$value,boxmode="overlay", points='all')
| fig.update_traces(quartilemethod="${quartileType.getQuartiletype}", col=1)
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -146,8 +149,8 @@ class BoxViolinPlotOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- | """.stripMargin
- finalCode
+ | """
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDesc.scala
index 3b95d95158a..59a8cf5cc89 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -44,21 +47,21 @@ class BubbleChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("Data column for the x-axis")
@AutofillAttributeName
@NotNull(message = "xValue column cannot be empty")
- var xValue: String = ""
+ var xValue: EncodableString = ""
@JsonProperty(value = "yValue", required = true)
@JsonSchemaTitle("Y-Column")
@JsonPropertyDescription("Data column for the y-axis")
@AutofillAttributeName
@NotNull(message = "yValue column cannot be empty")
- var yValue: String = ""
+ var yValue: EncodableString = ""
@JsonProperty(value = "zValue", required = true)
@JsonSchemaTitle("Z-Column")
@JsonPropertyDescription("Data column to determine bubble size")
@AutofillAttributeName
@NotNull(message = "zValue column cannot be empty")
- var zValue: String = ""
+ var zValue: EncodableString = ""
@JsonProperty(value = "enableColor", defaultValue = "false")
@JsonSchemaTitle("Enable Color")
@@ -70,7 +73,7 @@ class BubbleChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("Picks data column to color bubbles with if color is enabled")
@AutofillAttributeName
@NotNull(message = "colorCategory column cannot be empty")
- var colorCategory: String = ""
+ var colorCategory: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -90,28 +93,28 @@ class BubbleChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(xValue.nonEmpty && yValue.nonEmpty && zValue.nonEmpty)
- s"""
+ pyb"""
| # drops rows with missing values pertaining to relevant columns
- | table.dropna(subset=['$xValue', '$yValue', '$zValue'], inplace = True)
+ | table.dropna(subset=[$xValue, $yValue, $zValue], inplace = True)
|
- |""".stripMargin
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(xValue.nonEmpty && yValue.nonEmpty && zValue.nonEmpty)
- s"""
- | if '$enableColor' == 'true':
- | fig = go.Figure(px.scatter(table, x='$xValue', y='$yValue', size='$zValue', size_max=100, color='$colorCategory'))
- | else:
- | fig = go.Figure(px.scatter(table, x='$xValue', y='$yValue', size='$zValue', size_max=100))
- |""".stripMargin
+ pyb"""
+ | if $enableColor == 'true':
+ | fig = go.Figure(px.scatter(table, x=$xValue, y=$yValue, size=$zValue, size_max=100, color=$colorCategory))
+ | else:
+ | fig = go.Figure(px.scatter(table, x=$xValue, y=$yValue, size=$zValue, size_max=100))
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -140,7 +143,7 @@ class BubbleChartOpDesc extends PythonOperatorDescriptor {
| fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
| html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
| yield {'html-content':html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDesc.scala
index 93bb11db4b2..a5e19bca9a5 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -40,17 +42,17 @@ class BulletChartOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "value", required = true)
@JsonSchemaTitle("Value")
@JsonPropertyDescription("The actual value to display on the bullet chart")
- @AutofillAttributeName var value: String = ""
+ @AutofillAttributeName var value: EncodableString = ""
@JsonProperty(value = "deltaReference", required = true)
@JsonSchemaTitle("Delta Reference")
@JsonPropertyDescription("The reference value for the delta indicator. e.g., 100")
- var deltaReference: String = ""
+ var deltaReference: EncodableString = ""
@JsonProperty(value = "thresholdValue", required = false)
@JsonSchemaTitle("Threshold Value")
@JsonPropertyDescription("The performance threshold value. e.g., 100")
- var thresholdValue: String = ""
+ var thresholdValue: EncodableString = ""
@JsonProperty(value = "steps", required = false)
@JsonSchemaTitle("Steps")
@@ -78,14 +80,14 @@ class BulletChartOpDesc extends PythonOperatorDescriptor {
// Convert the Scala list of steps into a list of dictionaries
val stepsStr = if (steps != null && !steps.isEmpty) {
val stepsSeq =
- steps.asScala.map(step => s"""{"start": "${step.start}", "end": "${step.end}"}""")
+ steps.asScala.map(step => pyb"""{"start": ${step.start}, "end": ${step.end}}""")
"[" + stepsSeq.mkString(", ") + "]"
} else {
"[]"
}
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|import plotly.graph_objects as go
|import plotly.io as pio
@@ -133,8 +135,8 @@ class BulletChartOpDesc extends PythonOperatorDescriptor {
| return
|
| try:
- | value_col = "$value"
- | delta_ref = float("$deltaReference") if "$deltaReference".strip() else 0
+ | value_col = $value
+ | delta_ref = float($deltaReference) if $deltaReference.strip() else 0
|
| if value_col not in table.columns:
| yield {'html-content': self.render_error(f"Column '{value_col}' not found in input table.")}
@@ -146,7 +148,7 @@ class BulletChartOpDesc extends PythonOperatorDescriptor {
| return
|
| try:
- | threshold_val = float("$thresholdValue") if "$thresholdValue".strip() else None
+ | threshold_val = float($thresholdValue) if $thresholdValue.strip() else None
| except ValueError:
| threshold_val = None
|
@@ -225,7 +227,7 @@ class BulletChartOpDesc extends PythonOperatorDescriptor {
| yield {"html-content": final_html}
| except Exception as e:
| yield {'html-content': self.render_error(f"General error: {str(e)}")}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartStepDefinition.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartStepDefinition.scala
index 0fdc9989928..5ff0ad89537 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartStepDefinition.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartStepDefinition.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.bulletChart
import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
/**
* Defines a step range used for qualitative segments in the Bullet Chart.
@@ -29,8 +30,8 @@ import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
class BulletChartStepDefinition @JsonCreator() (
@JsonProperty("start")
@JsonSchemaTitle("Start")
- var start: String,
+ var start: EncodableString,
@JsonProperty("end")
@JsonSchemaTitle("End")
- var end: String
+ var end: EncodableString
)
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/candlestickChart/CandlestickChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/candlestickChart/CandlestickChartOpDesc.scala
index 62156031213..f0cc02a9b7d 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/candlestickChart/CandlestickChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/candlestickChart/CandlestickChartOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -34,31 +36,31 @@ class CandlestickChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Date Column")
@JsonPropertyDescription("the date of the candlestick")
@AutofillAttributeName
- var date: String = ""
+ var date: EncodableString = ""
@JsonProperty(value = "open", required = true)
@JsonSchemaTitle("Opening Price Column")
@JsonPropertyDescription("the opening price of the candlestick")
@AutofillAttributeName
- var open: String = ""
+ var open: EncodableString = ""
@JsonProperty(value = "high", required = true)
@JsonSchemaTitle("Highest Price Column")
@JsonPropertyDescription("the highest price of the candlestick")
@AutofillAttributeName
- var high: String = ""
+ var high: EncodableString = ""
@JsonProperty(value = "low", required = true)
@JsonSchemaTitle("Lowest Price Column")
@JsonPropertyDescription("the lowest price of the candlestick")
@AutofillAttributeName
- var low: String = ""
+ var low: EncodableString = ""
@JsonProperty(value = "close", required = true)
@JsonSchemaTitle("Closing Price Column")
@JsonPropertyDescription("the closing price of the candlestick")
@AutofillAttributeName
- var close: String = ""
+ var close: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -79,7 +81,7 @@ class CandlestickChartOpDesc extends PythonOperatorDescriptor {
)
override def generatePythonCode(): String = {
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.graph_objects as go
@@ -96,16 +98,16 @@ class CandlestickChartOpDesc extends PythonOperatorDescriptor {
| df = pd.DataFrame(table_dict)
|
| fig = go.Figure(data=[go.Candlestick(
- | x=df['$date'],
- | open=df['$open'],
- | high=df['$high'],
- | low=df['$low'],
- | close=df['$close']
+ | x=df[$date],
+ | open=df[$open],
+ | high=df[$high],
+ | low=df[$low],
+ | close=df[$close]
| )])
| fig.update_layout(title='Candlestick Chart')
| html = fig.to_html(include_plotlyjs='cdn', full_html=False)
| yield {'html-content': html}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/choroplethMap/ChoroplethMapOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/choroplethMap/ChoroplethMapOpDesc.scala
index 629739dcd15..f9774ce80f4 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/choroplethMap/ChoroplethMapOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/choroplethMap/ChoroplethMapOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonSchemaInject(json = """
{
@@ -48,7 +51,7 @@ class ChoroplethMapOpDesc extends PythonOperatorDescriptor {
"Column used to describe location. Currently only supports countries and needs to be three-letter ISO country code"
)
@AutofillAttributeName
- var locations: String = ""
+ var locations: EncodableString = ""
@JsonProperty(value = "color", required = true)
@JsonSchemaTitle("Color Column")
@@ -56,7 +59,7 @@ class ChoroplethMapOpDesc extends PythonOperatorDescriptor {
"Column used to determine intensity of color of the region"
)
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -75,25 +78,25 @@ class ChoroplethMapOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(locations.nonEmpty)
assert(color.nonEmpty)
- s"""
- | table.dropna(subset=['$locations', '$color'], inplace = True)
- |""".stripMargin
+ pyb"""
+ | table.dropna(subset=[$locations, $color], inplace = True)
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(locations.nonEmpty && color.nonEmpty)
- s"""
- | fig = px.choropleth(table, locations="$locations", color="$color", color_continuous_scale=px.colors.sequential.Plasma)
- | fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
- |""".stripMargin
+ pyb"""
+ | fig = px.choropleth(table, locations=$locations, color=$color, color_continuous_scale=px.colors.sequential.Plasma)
+ | fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -120,7 +123,7 @@ class ChoroplethMapOpDesc extends PythonOperatorDescriptor {
| ${createPlotlyFigure()}
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/BandConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/BandConfig.scala
index c4ae7f7ae02..7bd602eeb18 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/BandConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/BandConfig.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.continuousErrorBands
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.visualization.lineChart.LineConfig
@@ -30,16 +31,16 @@ class BandConfig extends LineConfig {
@JsonSchemaTitle("Y-Axis Upper Bound")
@JsonPropertyDescription("Represents upper bound error of y-values")
@AutofillAttributeName
- var yUpper: String = ""
+ var yUpper: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Y-Axis Lower Bound")
@JsonPropertyDescription("Represents lower bound error of y-values")
@AutofillAttributeName
- var yLower: String = ""
+ var yLower: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Fill Color")
@JsonPropertyDescription("must be a valid CSS color or hex color string")
- var fillColor: String = ""
+ var fillColor: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/ContinuousErrorBandsOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/ContinuousErrorBandsOpDesc.scala
index dd343ca5f68..b21c882ed35 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/ContinuousErrorBandsOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/continuousErrorBands/ContinuousErrorBandsOpDesc.scala
@@ -23,9 +23,12 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import java.util
import scala.jdk.CollectionConverters.ListHasAsScala
@@ -34,12 +37,12 @@ class ContinuousErrorBandsOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "xLabel", required = false, defaultValue = "X Axis")
@JsonSchemaTitle("X Label")
@JsonPropertyDescription("Label used for x axis")
- var xLabel: String = ""
+ var xLabel: EncodableString = ""
@JsonProperty(value = "yLabel", required = false, defaultValue = "Y Axis")
@JsonSchemaTitle("Y Label")
@JsonPropertyDescription("Label used for y axis")
- var yLabel: String = ""
+ var yLabel: EncodableString = ""
@JsonProperty(value = "bands", required = true)
var bands: util.List[BandConfig] = _
@@ -62,30 +65,30 @@ class ContinuousErrorBandsOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
val bandsPart = bands.asScala
.map { bandConf =>
val colorPart = if (bandConf.color != "") {
- s"line={'color':'${bandConf.color}'}, marker={'color':'${bandConf.color}'}, "
+ pyb"line={'color':${bandConf.color}}, marker={'color':${bandConf.color}}, "
} else {
""
}
val fillColorPart = if (bandConf.fillColor != "") {
- s"fillcolor='${bandConf.fillColor}', "
+ pyb"fillcolor=${bandConf.fillColor}, "
} else {
""
}
val namePart = if (bandConf.name != "") {
- s"name='${bandConf.name}'"
+ pyb"name=${bandConf.name}"
} else {
- s"name='${bandConf.yValue}'"
+ pyb"name=${bandConf.yValue}"
}
- s"""fig.add_trace(go.Scatter(
- x=table['${bandConf.xValue}'],
- y=table['${bandConf.yUpper}'],
+ pyb"""fig.add_trace(go.Scatter(
+ x=table[${bandConf.xValue}],
+ y=table[${bandConf.yUpper}],
mode='lines',
marker=dict(color="#444"),
line=dict(width=0),
@@ -93,8 +96,8 @@ class ContinuousErrorBandsOpDesc extends PythonOperatorDescriptor {
$namePart
))
fig.add_trace(go.Scatter(
- x=table['${bandConf.xValue}'],
- y=table['${bandConf.yLower}'],
+ x=table[${bandConf.xValue}],
+ y=table[${bandConf.yLower}],
mode='lines',
marker=dict(color="#444"),
line=dict(width=0),
@@ -104,27 +107,27 @@ class ContinuousErrorBandsOpDesc extends PythonOperatorDescriptor {
$namePart
))
fig.add_trace(go.Scatter(
- x=table['${bandConf.xValue}'],
- y=table['${bandConf.yValue}'],
- mode='${bandConf.mode.getModeInPlotly}',
+ x=table[${bandConf.xValue}],
+ y=table[${bandConf.yValue}],
+ mode=${bandConf.mode.getModeInPlotly},
$colorPart
$namePart
))"""
}
- s"""
+ pyb"""
| fig = go.Figure()
| ${bandsPart.mkString("\n ")}
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0),
- | xaxis_title='$xLabel',
- | yaxis_title='$yLabel',
+ | xaxis_title=$xLabel,
+ | yaxis_title=$yLabel,
| hovermode="x")
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -148,7 +151,7 @@ class ContinuousErrorBandsOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/contourPlot/ContourPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/contourPlot/ContourPlotOpDesc.scala
index 3c77dd4e044..dd0f41b0faa 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/contourPlot/ContourPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/contourPlot/ContourPlotOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -34,24 +36,24 @@ class ContourPlotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("x")
@JsonPropertyDescription("The column name of X-axis")
@AutofillAttributeName
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(value = "y", required = true)
@JsonSchemaTitle("y")
@JsonPropertyDescription("The column name of Y-axis")
@AutofillAttributeName
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(value = "z", required = true)
@JsonSchemaTitle("z")
@JsonPropertyDescription("The column name of color bar")
@AutofillAttributeName
- var z: String = ""
+ var z: EncodableString = ""
@JsonProperty(required = false, defaultValue = "10")
@JsonSchemaTitle("Grid Size")
@JsonPropertyDescription("Grid resolution of the final image")
- var gridSize: String = ""
+ var gridSize: EncodableString = ""
@JsonProperty(required = false, defaultValue = "true")
@JsonSchemaTitle("Connect Gaps")
@@ -84,7 +86,7 @@ class ContourPlotOpDesc extends PythonOperatorDescriptor {
)
override def generatePythonCode(): String = {
- s"""from pytexera import *
+ pyb"""from pytexera import *
|import numpy as np
|import plotly.graph_objects as go
|from scipy.interpolate import griddata
@@ -94,11 +96,11 @@ class ContourPlotOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | x = table['$x'].values
- | y = table['$y'].values
- | z = table['$z'].values
- | grid_size = int('$gridSize')
- | connGaps = True if '$connectGaps' == 'true' else False
+ | x = table[$x].values
+ | y = table[$y].values
+ | z = table[$z].values
+ | grid_size = int($gridSize)
+ | connGaps = True if $connectGaps == 'true' else False
|
| grid_x, grid_y = np.meshgrid(np.linspace(min(x), max(x), grid_size), np.linspace(min(y), max(y), grid_size))
| grid_z = griddata((x, y), z, (grid_x, grid_y), method='cubic')
@@ -108,12 +110,12 @@ class ContourPlotOpDesc extends PythonOperatorDescriptor {
| y=np.linspace(min(y), max(y), grid_size),
| z=grid_z,
| connectgaps=connGaps,
- | contours_coloring ='${coloringMethod.getColoringMethod}',
- | colorbar_title='$z'
+ | contours_coloring =${coloringMethod.getColoringMethod},
+ | colorbar_title=$z
| ))
| fig.update_layout(title='Contour Plot')
| html = pio.to_html(fig, include_plotlyjs='cdn', full_html=False)
| yield {'html-content': html}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dendrogram/DendrogramOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dendrogram/DendrogramOpDesc.scala
index 4cb31e76746..d33ff2708f1 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dendrogram/DendrogramOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dendrogram/DendrogramOpDesc.scala
@@ -23,34 +23,37 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class DendrogramOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "xVal", required = true)
@JsonSchemaTitle("Value X Column")
@JsonPropertyDescription("The x values of points in dendrogram")
@AutofillAttributeName
- var xVal: String = ""
+ var xVal: EncodableString = ""
@JsonProperty(value = "yVal", required = true)
@JsonSchemaTitle("Value Y Column")
@JsonPropertyDescription("The y value of points in dendrogram")
@AutofillAttributeName
- var yVal: String = ""
+ var yVal: EncodableString = ""
@JsonProperty(value = "Labels", required = true)
@JsonSchemaTitle("Labels")
@JsonPropertyDescription("The label of points in dendrogram")
@AutofillAttributeName
- var labels: String = ""
+ var labels: EncodableString = ""
@JsonProperty(defaultValue = "", required = false)
@JsonSchemaTitle("Color Threshold")
@JsonPropertyDescription("Value at which separation of clusters will be made")
- var threshold: String = ""
+ var threshold: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -69,28 +72,28 @@ class DendrogramOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- private def createDendrogram(): String = {
+ private def createDendrogram(): PythonTemplateBuilder = {
assert(xVal.nonEmpty)
assert(yVal.nonEmpty)
assert(labels.nonEmpty)
- val strippedThreshold = threshold.trim
+ val strippedThreshold: EncodableString = threshold.trim
val isThreshold =
- if (strippedThreshold.nonEmpty) s"color_threshold=$strippedThreshold"
+ if (strippedThreshold.nonEmpty) pyb"color_threshold=$strippedThreshold"
else "color_threshold=None"
- s"""
- | x = np.array(table["$xVal"])
- | y = np.array(table["$yVal"])
+ pyb"""
+ | x = np.array(table[$xVal])
+ | y = np.array(table[$yVal])
| data = np.column_stack((x, y))
- | labels = table["$labels"].tolist()
+ | labels = table[$labels].tolist()
|
| fig = ff.create_dendrogram(data, labels=labels, $isThreshold)
| fig.update_layout(yaxis_title="Linkage Distance", margin=dict(l=0, r=0, b=0, t=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -115,7 +118,7 @@ class DendrogramOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellDotConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellDotConfig.scala
index 13a803debbf..65f56e6fdcd 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellDotConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellDotConfig.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.dumbbellPlot
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import javax.validation.constraints.NotNull
@@ -41,6 +42,6 @@ class DumbbellDotConfig {
@JsonPropertyDescription("value for dot axis")
@AutofillAttributeName
@NotNull(message = "Dot Column Value cannot be empty")
- var dotValue: String = ""
+ var dotValue: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellPlotOpDesc.scala
index ac49336c0a9..88b6caae614 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/dumbbellPlot/DumbbellPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import java.util
import javax.validation.constraints.{NotBlank, NotNull}
@@ -48,33 +51,33 @@ class DumbbellPlotOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("the name of the category column")
@AutofillAttributeName
@NotNull(message = "Category Column Name cannot be empty")
- var categoryColumnName: String = ""
+ var categoryColumnName: EncodableString = ""
@JsonProperty(value = "dumbbellStartValue", required = true)
@JsonSchemaTitle("Dumbbell Start Value")
@JsonPropertyDescription("the start point value of each dumbbell")
@NotBlank(message = "Dumbbell Start Value cannot be empty")
- var dumbbellStartValue: String = ""
+ var dumbbellStartValue: EncodableString = ""
@JsonProperty(value = "dumbbellEndValue", required = true)
@JsonSchemaTitle("Dumbbell End Value")
@JsonPropertyDescription("the end value of each dumbbell")
@NotBlank(message = "Dumbbell End Value cannot be empty")
- var dumbbellEndValue: String = ""
+ var dumbbellEndValue: EncodableString = ""
@JsonProperty(value = "measurementColumnName", required = true)
@JsonSchemaTitle("Measurement Column Name")
@JsonPropertyDescription("the name of the measurement column")
@AutofillAttributeName
@NotNull(message = "Measurement Column Name cannot be empty")
- var measurementColumnName: String = ""
+ var measurementColumnName: EncodableString = ""
@JsonProperty(value = "comparedColumnName", required = true)
@JsonSchemaTitle("Compared Column Name")
@JsonPropertyDescription("the column name that is being compared")
@AutofillAttributeName
@NotNull(message = "Compared Column Name cannot be empty")
- var comparedColumnName: String = ""
+ var comparedColumnName: EncodableString = ""
@JsonProperty(value = "dots", required = false)
var dots: util.List[DumbbellDotConfig] = _
@@ -102,57 +105,57 @@ class DumbbellPlotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyDumbbellLineFigure(): String = {
- val dumbbellValues = dumbbellStartValue + ", " + dumbbellEndValue
+ def createPlotlyDumbbellLineFigure(): PythonTemplateBuilder = {
+ val dumbbellValues = pyb"$dumbbellStartValue, $dumbbellEndValue"
var showLegendsOption = "showlegend=False"
if (showLegends) {
showLegendsOption = "showlegend=True"
}
- s"""
+ pyb"""
|
- | entityNames = list(table['${comparedColumnName}'].unique())
+ | entityNames = list(table[${comparedColumnName}].unique())
| entityNames = sorted(entityNames, reverse=True)
| categoryValues = [${dumbbellValues}]
- | filtered_table = table[(table['${comparedColumnName}'].isin(entityNames)) &
- | (table['${categoryColumnName}'].isin(categoryValues))]
+ | filtered_table = table[(table[${comparedColumnName}].isin(entityNames)) &
+ | (table[${categoryColumnName}].isin(categoryValues))]
|
| # Create the dumbbell line using Plotly
| fig = go.Figure()
| color = 'black'
| for entity in entityNames:
- | entity_data = filtered_table[filtered_table['${comparedColumnName}'] == entity]
- | fig.add_trace(go.Scatter(x=entity_data['${measurementColumnName}'],
+ | entity_data = filtered_table[filtered_table[${comparedColumnName}] == entity]
+ | fig.add_trace(go.Scatter(x=entity_data[${measurementColumnName}],
| y=[entity]*len(entity_data),
| mode='lines',
| name=entity,
| line=dict(color=color)))
|
- | fig.update_layout(xaxis_title="${measurementColumnName}",
- | yaxis_title="${comparedColumnName}",
+ | fig.update_layout(xaxis_title=${measurementColumnName},
+ | yaxis_title=${comparedColumnName},
| yaxis=dict(categoryorder='array', categoryarray=entityNames),
| ${showLegendsOption}
| )
- |""".stripMargin
+ |"""
}
- def addPlotlyDots(): String = {
+ def addPlotlyDots(): PythonTemplateBuilder = {
var dotColumnNames = ""
if (dots != null && dots.size() != 0) {
dotColumnNames = dots.asScala
.map { dot =>
- s"'${dot.dotValue}'"
+ pyb"${dot.dotValue}"
}
.mkString(",")
}
- s"""
+ pyb"""
| dotColumnNames = [${dotColumnNames}]
| if len(dotColumnNames) > 0:
| for dotColumn in dotColumnNames:
| # Extract dot data for each entity
| for entity in entityNames:
- | entity_dot_data = filtered_table[filtered_table['${comparedColumnName}'] == entity]
+ | entity_dot_data = filtered_table[filtered_table[${comparedColumnName}] == entity]
| # Extract X and Y values for the dot
| x_values = entity_dot_data[dotColumn].values
| y_values = [entity] * len(x_values)
@@ -161,11 +164,11 @@ class DumbbellPlotOpDesc extends PythonOperatorDescriptor {
| mode='markers',
| name=entity + ' ' + dotColumn,
| marker=dict(color='black', size=5))) # Customize color and size as needed
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -191,6 +194,6 @@ class DumbbellPlotOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableConfig.scala
index f8d5f0dd354..726f3d78ea5 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableConfig.scala
@@ -21,11 +21,12 @@ package org.apache.texera.amber.operator.visualization.figureFactoryTable
import com.fasterxml.jackson.annotation.JsonProperty
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
class FigureFactoryTableConfig {
@JsonProperty(required = true)
@JsonSchemaTitle("Attribute Name")
@AutofillAttributeName
- var attributeName: String = ""
+ var attributeName: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableOpDesc.scala
index 15d79e327da..11168488b74 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/figureFactoryTable/FigureFactoryTableOpDesc.scala
@@ -23,67 +23,70 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class FigureFactoryTableOpDesc extends PythonOperatorDescriptor {
@JsonProperty(required = false)
@JsonSchemaTitle("Font Size")
@JsonPropertyDescription("Font size of the Figure Factory Table")
- var fontSize: String = "12"
+ var fontSize: Double = 12
@JsonProperty(required = false)
@JsonSchemaTitle("Font Color (Hex Code)")
@JsonPropertyDescription("Font color of the Figure Factory Table")
- var fontColor: String = "#000000"
+ var fontColor: EncodableString = "#000000"
@JsonProperty(required = false)
@JsonSchemaTitle("Row Height")
@JsonPropertyDescription("Row height of the Figure Factory Table")
- var rowHeight: String = "30"
+ var rowHeight: Double = 30
@JsonPropertyDescription("List of columns to include in the figure factory table")
@JsonProperty(value = "add attribute", required = true)
var columns: List[FigureFactoryTableConfig] = List()
private def getAttributes: String =
- columns.map(_.attributeName).mkString("'", "','", "'")
+ columns.map(c => pyb"""${c.attributeName}""").mkString(",")
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(columns.nonEmpty)
val attributes = getAttributes
- s"""
- | # drops rows with missing values pertaining to relevant columns
- | table = table.dropna(subset=[$attributes])
- |
- |""".stripMargin
+ pyb"""
+ | # drops rows with missing values pertaining to relevant columns
+ | table = table.dropna(subset=[$attributes])
+ |
+ |"""
}
- def createFigureFactoryTablePlotlyFigure(): String = {
+ def createFigureFactoryTablePlotlyFigure(): PythonTemplateBuilder = {
assert(columns.nonEmpty)
- val intFontSize: Option[Double] = fontSize.toDoubleOption
- val intRowHeight: Option[Double] = rowHeight.toDoubleOption
+ val intFontSize: Option[Double] = Option(fontSize)
+ val intRowHeight: Option[Double] = Option(rowHeight)
assert(intFontSize.isDefined && intFontSize.get >= 0)
assert(intRowHeight.isDefined && intRowHeight.get >= 30)
val attributes = getAttributes
- s"""
- | filtered_table = table[[$attributes]]
- | headers = filtered_table.columns.tolist()
- | cell_values = [filtered_table[col].tolist() for col in headers]
- |
- | data = [headers] + list(map(list, zip(*cell_values)))
- | fig = ff.create_table(data, height_constant = ${intRowHeight.get}, font_colors=['$fontColor'])
- |
- | # Make text size larger
- | for i in range(len(fig.layout.annotations)):
- | fig.layout.annotations[i].font.size = ${intFontSize.get}
- |
- |""".stripMargin
+ pyb"""
+ | filtered_table = table[[$attributes]]
+ | headers = filtered_table.columns.tolist()
+ | cell_values = [filtered_table[col].tolist() for col in headers]
+ |
+ | data = [headers] + list(map(list, zip(*cell_values)))
+ | fig = ff.create_table(data, height_constant = ${intRowHeight.get}, font_colors=[$fontColor])
+ |
+ | # Make text size larger
+ | for i in range(len(fig.layout.annotations)):
+ | fig.layout.annotations[i].font.size = ${intFontSize.get}
+ |
+ |"""
}
override def generatePythonCode(): String = {
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/filledAreaPlot/FilledAreaPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/filledAreaPlot/FilledAreaPlotOpDesc.scala
index f8e54a93f0e..d8d47696157 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/filledAreaPlot/FilledAreaPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/filledAreaPlot/FilledAreaPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -37,26 +40,26 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The attribute for your x-axis")
@AutofillAttributeName
@NotNull(message = "X-axis Attribute cannot be empty")
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Y-axis Attribute")
@JsonPropertyDescription("The attribute for your y-axis")
@AutofillAttributeName
@NotNull(message = "Y-axis Attribute cannot be empty")
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Line Group")
@JsonPropertyDescription("The attribute for group of each line")
@AutofillAttributeName
- var lineGroup: String = ""
+ var lineGroup: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Color")
@JsonPropertyDescription("Choose an attribute to color the plot")
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Split Plot by Line Group")
@@ -67,7 +70,7 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Pattern")
@JsonPropertyDescription("Add texture to the chart based on an attribute")
@AutofillAttributeName
- var pattern: String = ""
+ var pattern: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -87,7 +90,7 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(x.nonEmpty)
assert(y.nonEmpty)
@@ -95,24 +98,24 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
assert(lineGroup.nonEmpty)
}
- val colorArg = if (color.nonEmpty) s""", color="$color"""" else ""
- val facetColumnArg = if (facetColumn) s""", facet_col="$lineGroup"""" else ""
- val lineGroupArg = if (lineGroup.nonEmpty) s""", line_group="$lineGroup"""" else ""
- val patternParam = if (pattern.nonEmpty) s""", pattern_shape="$pattern"""" else ""
+ val colorArg = if (color.nonEmpty) pyb""", color=$color""" else ""
+ val facetColumnArg = if (facetColumn) pyb""", facet_col=$lineGroup""" else ""
+ val lineGroupArg = if (lineGroup.nonEmpty) pyb""", line_group=$lineGroup""" else ""
+ val patternParam = if (pattern.nonEmpty) pyb""", pattern_shape=$pattern""" else ""
- s"""
- | fig = px.area(table, x="$x", y="$y"$colorArg$facetColumnArg$lineGroupArg$patternParam)
- |""".stripMargin
+ pyb"""
+ | fig = px.area(table, x=$x, y=$y$colorArg$facetColumnArg$lineGroupArg$patternParam)
+ |"""
}
// The function below checks whether there are more than 5 percents of the groups have disjoint sets of x attributes.
- def performTableCheck(): String = {
- s"""
+ def performTableCheck(): PythonTemplateBuilder = {
+ pyb"""
| error = ""
- | if "$x" not in columns or "$y" not in columns:
+ | if $x not in columns or $y not in columns:
| error = "missing attributes"
- | elif "$lineGroup" != "":
- | grouped = table.groupby("$lineGroup")
+ | elif $lineGroup != "":
+ | grouped = table.groupby($lineGroup)
| x_values = None
|
| tolerance = (len(grouped) // 100) * 5
@@ -120,19 +123,19 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
|
| for _, group in grouped:
| if x_values == None:
- | x_values = set(group["$x"].unique())
- | elif set(group["$x"].unique()).intersection(x_values):
- | X_values = x_values.union(set(group["$x"].unique()))
- | elif not set(group["$x"].unique()).intersection(x_values):
+ | x_values = set(group[$x].unique())
+ | elif set(group[$x].unique()).intersection(x_values):
+ | X_values = x_values.union(set(group[$x].unique()))
+ | elif not set(group[$x].unique()).intersection(x_values):
| count += 1
| if count > tolerance:
| error = "X attributes not shared across groups"
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly
@@ -167,8 +170,8 @@ class FilledAreaPlotOpDesc extends PythonOperatorDescriptor {
| '''
|
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDesc.scala
index 61ff4ddd393..89c1cb0b104 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonSchemaInject(json = """
{
"attributeTypeRules": {
@@ -40,19 +43,19 @@ class FunnelPlotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("X Column")
@JsonPropertyDescription("Data column for the x-axis")
@AutofillAttributeName
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Y Column")
@JsonPropertyDescription("Data column for the y-axis")
@AutofillAttributeName
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Color Column")
@JsonPropertyDescription("Column to categorically colorize funnel sections")
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -72,25 +75,25 @@ class FunnelPlotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- private def createPlotlyFigure(): String = {
+ private def createPlotlyFigure(): PythonTemplateBuilder = {
assert(x.nonEmpty)
assert(y.nonEmpty)
- val colorArg = if (color.nonEmpty) s""", color="$color"""" else ""
- s"""
- | fig = go.Figure(px.funnel(table, x ="$x", y = "$y"$colorArg))
- | fig.update_layout(
- | scene=dict(
- | xaxis_title='X: $x',
- | yaxis_title='Y: $y',
- | ),
- | margin=dict(t=0, b=0, l=0, r=0)
- | )
- |""".stripMargin
+ val colorArg = if (color.nonEmpty) pyb""", color=$color""" else ""
+ pyb"""
+ | fig = go.Figure(px.funnel(table, x =$x, y = $y$colorArg))
+ | fig.update_layout(
+ | scene=dict(
+ | xaxis_title='X: ' + $x,
+ | yaxis_title='Y: ' + $y,
+ | ),
+ | margin=dict(t=0, b=0, l=0, r=0)
+ | )
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -115,8 +118,7 @@ class FunnelPlotOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
-
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDesc.scala
index 3be1ae9fa55..9a1f32e43ef 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -49,33 +52,33 @@ class GanttChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("the start timestamp of the task")
@AutofillAttributeName
@NotNull(message = "Start Datetime Column cannot be empty")
- var start: String = ""
+ var start: EncodableString = ""
@JsonProperty(value = "finish", required = true)
@JsonSchemaTitle("Finish Datetime Column")
@JsonPropertyDescription("the end timestamp of the task")
@AutofillAttributeName
@NotNull(message = "Finish Datetime Column cannot be empty")
- var finish: String = ""
+ var finish: EncodableString = ""
@JsonProperty(value = "task", required = true)
@JsonSchemaTitle("Task Column")
@JsonPropertyDescription("the name of the task")
@AutofillAttributeName
@NotNull(message = "Task Column cannot be empty")
- var task: String = ""
+ var task: EncodableString = ""
@JsonProperty(value = "color", required = false)
@JsonSchemaTitle("Color Column")
@JsonPropertyDescription("column to color tasks")
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Pattern")
@JsonPropertyDescription("Add texture to the chart based on an attribute")
@AutofillAttributeName
- var pattern: String = ""
+ var pattern: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -95,28 +98,28 @@ class GanttChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
- val optionalFilterTable = if (color.nonEmpty) s"&(table['$color'].notnull())" else ""
- s"""
- | table = table[(table["$start"].notnull())&(table["$finish"].notnull())&(table["$finish"].notnull())$optionalFilterTable].copy()
- |""".stripMargin
+ def manipulateTable(): PythonTemplateBuilder = {
+ val optionalFilterTable = if (color.nonEmpty) pyb"&(table[$color].notnull())" else ""
+ pyb"""
+ | table = table[(table[$start].notnull())&(table[$finish].notnull())&(table[$finish].notnull())$optionalFilterTable].copy()
+ |"""
}
- def createPlotlyFigure(): String = {
- val colorSetting = if (color.nonEmpty) s", color='$color'" else ""
- val patternParam = if (pattern.nonEmpty) s", pattern_shape='$pattern'" else ""
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ val colorSetting = if (color.nonEmpty) pyb", color=$color" else pyb""
+ val patternParam = if (pattern.nonEmpty) pyb", pattern_shape=$pattern" else pyb""
- s"""
- | fig = px.timeline(table, x_start='$start', x_end='$finish', y='$task' $colorSetting $patternParam)
+ pyb"""
+ | fig = px.timeline(table, x_start=$start, x_end=$finish, y=$task $colorSetting $patternParam)
| fig.update_yaxes(autorange='reversed')
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -143,7 +146,7 @@ class GanttChartOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartOpDesc.scala
index fdc48ccc2a3..c890786eeff 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartOpDesc.scala
@@ -24,6 +24,8 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -34,17 +36,17 @@ class GaugeChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Gauge Value")
@JsonPropertyDescription("The primary value displayed on the gauge chart")
@AutofillAttributeName
- var value: String = ""
+ var value: EncodableString = ""
@JsonProperty(value = "delta", required = false)
@JsonSchemaTitle("Delta")
@JsonPropertyDescription("The baseline value used to calculate the delta from the gauge value")
- var delta: String = ""
+ var delta: EncodableString = ""
@JsonProperty(value = "threshold", required = false)
@JsonSchemaTitle("Threshold Value")
@JsonPropertyDescription("Defines a boundary or target value shown on the gauge chart")
- var threshold: String = ""
+ var threshold: EncodableString = ""
@JsonProperty(value = "steps", required = false)
@JsonSchemaTitle("Steps")
@@ -75,9 +77,9 @@ class GaugeChartOpDesc extends PythonOperatorDescriptor {
}
override def generatePythonCode(): String = {
- val stepsStr: String = serializeSteps(steps)
+ val stepsStr: EncodableString = serializeSteps(steps)
- s"""
+ pyb"""
|from pytexera import *
|import plotly.graph_objects as go
|import plotly.io as pio
@@ -103,13 +105,13 @@ class GaugeChartOpDesc extends PythonOperatorDescriptor {
| return
|
| try:
- | gauge_value = "$value"
+ | gauge_value = $value
| try:
- | delta_ref = float("$delta") if "$delta".strip() else None
+ | delta_ref = float($delta) if $delta.strip() else None
| except ValueError:
| delta_ref = None
| try:
- | threshold_val = float("$threshold") if "$threshold".strip() else None
+ | threshold_val = float($threshold) if $threshold.strip() else None
| except ValueError:
| threshold_val = None
|
@@ -119,7 +121,7 @@ class GaugeChartOpDesc extends PythonOperatorDescriptor {
| return
|
| try:
- | valid_steps = json.loads('''$stepsStr''')
+ | valid_steps = json.loads($stepsStr)
| step_colors = self.generate_gray_gradient(len(valid_steps))
| steps_list = []
| for index, step_data in enumerate(valid_steps):
@@ -184,6 +186,6 @@ class GaugeChartOpDesc extends PythonOperatorDescriptor {
|
| except Exception as e:
| yield {'html-content': self.render_error(f"General error: {str(e)}")}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartSteps.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartSteps.scala
index 78f407af37e..4c6235a9ad9 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartSteps.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/gaugeChart/GaugeChartSteps.scala
@@ -20,13 +20,14 @@ package org.apache.texera.amber.operator.visualization.gaugeChart
import com.fasterxml.jackson.annotation.JsonProperty
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
class GaugeChartSteps {
@JsonProperty("start")
@JsonSchemaTitle("Start")
- var start: String = ""
+ var start: EncodableString = ""
@JsonProperty("end")
@JsonSchemaTitle("End")
- var end: String = ""
+ var end: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/heatMap/HeatMapOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/heatMap/HeatMapOpDesc.scala
index f66730090e7..d38dfdf4c90 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/heatMap/HeatMapOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/heatMap/HeatMapOpDesc.scala
@@ -23,29 +23,32 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class HeatMapOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "x", required = true)
@JsonSchemaTitle("Value X Column")
@JsonPropertyDescription("the values along the x-axis")
@AutofillAttributeName
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(value = "y", required = true)
@JsonSchemaTitle("Value Y Column")
@JsonPropertyDescription("the values along the y-axis")
@AutofillAttributeName
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(value = "Values", required = true)
@JsonSchemaTitle("Values")
@JsonPropertyDescription("the values of the heatmap")
@AutofillAttributeName
- var value: String = ""
+ var value: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -65,20 +68,20 @@ class HeatMapOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- private def createHeatMap(): String = {
+ private def createHeatMap(): PythonTemplateBuilder = {
assert(x.nonEmpty)
assert(y.nonEmpty)
assert(value.nonEmpty)
- s"""
- | heatmap = go.Heatmap(z=table["$value"],x=table["$x"],y=table["$y"])
+ pyb"""
+ | heatmap = go.Heatmap(z=table[$value],x=table[$x],y=table[$y])
| layout = go.Layout(margin=dict(l=0, r=0, b=0, t=0))
| fig = go.Figure(data=[heatmap], layout=layout)
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -103,8 +106,8 @@ class HeatMapOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDesc.scala
index 5b42935307c..d46549111c2 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.{NotEmpty, NotNull}
@@ -60,7 +63,7 @@ class HierarchyChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The value associated with the size of each sector in the chart")
@AutofillAttributeName
@NotNull(message = "Value column cannot be empty")
- var value: String = ""
+ var value: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -81,30 +84,30 @@ class HierarchyChartOpDesc extends PythonOperatorDescriptor {
)
private def getHierarchyAttributesInPython: String =
- hierarchy.map(_.attributeName).mkString("'", "','", "'")
+ hierarchy.map(c => pyb"${c.attributeName}").mkString(",")
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(value.nonEmpty)
val attributes = getHierarchyAttributesInPython
- s"""
- | table['$value'] = table[table['$value'] > 0]['$value'] # remove non-positive numbers from the data
+ pyb"""
+ | table[$value] = table[table[$value] > 0][$value] # remove non-positive numbers from the data
| table.dropna(subset = [$attributes], inplace = True) #remove missing values
- |""".stripMargin
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(hierarchy.nonEmpty)
val attributes = getHierarchyAttributesInPython
- s"""
- | fig = px.${hierarchyChartType.getPlotlyExpressApiName}(table, path=[$attributes], values='$value',
- | color='$value', hover_data=[$attributes],
+ pyb"""
+ | fig = px.${hierarchyChartType.getPlotlyExpressApiName}(table, path=[$attributes], values=$value,
+ | color=$value, hover_data=[$attributes],
| color_continuous_scale='RdBu')
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -134,8 +137,8 @@ class HierarchyChartOpDesc extends PythonOperatorDescriptor {
| fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchySection.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchySection.scala
index 9d8946f7b1d..55128f5ee2a 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchySection.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchySection.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.hierarchychart
import com.fasterxml.jackson.annotation.JsonProperty
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import javax.validation.constraints.NotNull
@@ -32,5 +33,5 @@ class HierarchySection {
@JsonSchemaTitle("Attribute Name")
@AutofillAttributeName
@NotNull(message = "Attribute Name cannot be empty")
- var attributeName: String = ""
+ var attributeName: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram/HistogramChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram/HistogramChartOpDesc.scala
index 8e3ac2c3793..0c1a29b781b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram/HistogramChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram/HistogramChartOpDesc.scala
@@ -23,39 +23,42 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class HistogramChartOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "value", required = true)
@JsonSchemaTitle("Value Column")
@JsonPropertyDescription("Column for counting values.")
@AutofillAttributeName
- var value: String = ""
+ var value: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Color Column")
@JsonPropertyDescription("Column for differentiating data by its value.")
@AutofillAttributeName
- var color: String = ""
+ var color: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("SeparateBy Column")
@JsonPropertyDescription("Column for separating histogram chart by its value.")
@AutofillAttributeName
- var separateBy: String = ""
+ var separateBy: EncodableString = ""
@JsonProperty(required = false, defaultValue = "")
@JsonSchemaTitle("Distribution Type")
@JsonPropertyDescription("Distribution type (rug, box, violin).")
- var marginal: String = ""
+ var marginal: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Pattern")
@JsonPropertyDescription("Add texture to the chart based on an attribute")
@AutofillAttributeName
- var pattern: String = ""
+ var pattern: EncodableString = ""
override def operatorInfo: OperatorInfo =
OperatorInfo(
@@ -66,26 +69,26 @@ class HistogramChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(value.nonEmpty)
- var colorParam = ""
- var categoryParam = ""
- var marginalParam = ""
- var patternParam = ""
- if (color.nonEmpty) colorParam = s", color = '$color'"
- if (separateBy.nonEmpty) categoryParam = s", facet_col = '$separateBy'"
- if (marginal.nonEmpty) marginalParam = s", marginal='$marginal'"
- if (pattern != "") patternParam = s", pattern_shape='$pattern'"
+ var colorParam = pyb""
+ var categoryParam = pyb""
+ var marginalParam = pyb""
+ var patternParam = pyb""
+ if (color.nonEmpty) colorParam = pyb", color = $color"
+ if (separateBy.nonEmpty) categoryParam = pyb", facet_col = $separateBy"
+ if (marginal.nonEmpty) marginalParam = pyb", marginal=$marginal"
+ if (pattern != "") patternParam = pyb", pattern_shape=$pattern"
- s"""
- | fig = px.histogram(table, x = '$value', text_auto = True $colorParam $categoryParam $marginalParam $patternParam)
- | fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
- |""".stripMargin
+ pyb"""
+ | fig = px.histogram(table, x = $value, text_auto = True $colorParam $categoryParam $marginalParam $patternParam)
+ | fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -109,8 +112,8 @@ class HistogramChartOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
override def getOutputSchemas(
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram2d/Histogram2DOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram2d/Histogram2DOpDesc.scala
index 1068b5e8fa8..88167a8353f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram2d/Histogram2DOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/histogram2d/Histogram2DOpDesc.scala
@@ -22,6 +22,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -33,23 +35,23 @@ class Histogram2DOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("X Column")
@JsonPropertyDescription("Numeric column for the X axis bins.")
@AutofillAttributeName
- var xColumn = ""
+ var xColumn: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Y Column")
@JsonPropertyDescription("Numeric column for the Y axis bins.")
@AutofillAttributeName
- var yColumn = ""
+ var yColumn: EncodableString = ""
@JsonProperty(required = true, defaultValue = "10")
@JsonSchemaTitle("X Bins")
@JsonPropertyDescription("Number of bins along the X axis (Default: 10)")
- var xBins: Int = _
+ var xBins: Int = 10
@JsonProperty(required = true, defaultValue = "10")
@JsonSchemaTitle("Y Bins")
@JsonPropertyDescription("Number of bins along the Y axis (Default: 10)")
- var yBins: Int = _
+ var yBins: Int = 10
@JsonProperty(required = false, defaultValue = "density")
@JsonSchemaTitle("Normalization")
@@ -79,9 +81,9 @@ class Histogram2DOpDesc extends PythonOperatorDescriptor {
assert(yBins > 0, s"Y Bins must be > 0, but got $yBins")
val normArg =
- s"histnorm='${normalize.getValue}',"
+ pyb"histnorm=${normalize.getValue},"
- s"""
+ pyb"""
|from pytexera import *
|import plotly.express as px
|import plotly.io
@@ -98,23 +100,23 @@ class Histogram2DOpDesc extends PythonOperatorDescriptor {
| return
|
| # Drop rows with missing x/y
- | table.dropna(subset=['${xColumn}', '${yColumn}'], inplace=True)
+ | table.dropna(subset=[$xColumn, $yColumn], inplace=True)
| if table.empty:
| yield {"html-content": self.render_error("No rows after dropping nulls.")}
| return
|
| fig = px.density_heatmap(
| table,
- | x='${xColumn}',
- | y='${yColumn}',
- | nbinsx=${xBins},
- | nbinsy=${yBins},
- | ${normArg}
+ | x=$xColumn,
+ | y=$yColumn,
+ | nbinsx=$xBins,
+ | nbinsy=$yBins,
+ | $normArg
| text_auto=True
| )
|
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {"html-content": html}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineChartOpDesc.scala
index 90613e855e4..2400b53e11b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineChartOpDesc.scala
@@ -23,9 +23,12 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import java.util
import scala.jdk.CollectionConverters.ListHasAsScala
@@ -35,12 +38,12 @@ class LineChartOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "yLabel", required = false, defaultValue = "Y Axis")
@JsonSchemaTitle("Y Label")
@JsonPropertyDescription("the label for y axis")
- var yLabel: String = ""
+ var yLabel: EncodableString = ""
@JsonProperty(value = "xLabel", required = false, defaultValue = "X Axis")
@JsonSchemaTitle("X Label")
@JsonPropertyDescription("the label for x axis")
- var xLabel: String = ""
+ var xLabel: EncodableString = ""
@JsonProperty(value = "lines", required = true)
var lines: util.List[LineConfig] = _
@@ -63,42 +66,42 @@ class LineChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
val linesPart = lines.asScala
.map { lineConf =>
val colorPart = if (lineConf.color != "") {
- s"line={'color':'${lineConf.color}'}, marker={'color':'${lineConf.color}'}, "
+ pyb"line={'color':${lineConf.color}}, marker={'color':${lineConf.color}}, "
} else {
- ""
+ pyb""
}
val namePart = if (lineConf.name != "") {
- s"name='${lineConf.name}'"
+ pyb"name=${lineConf.name}"
} else {
- s"name='${lineConf.yValue}'"
+ pyb"name=${lineConf.yValue}"
}
- s"""fig.add_trace(go.Scatter(
- x=table['${lineConf.xValue}'],
- y=table['${lineConf.yValue}'],
+ pyb"""fig.add_trace(go.Scatter(
+ x=table[${lineConf.xValue}],
+ y=table[${lineConf.yValue}],
mode='${lineConf.mode.getModeInPlotly}',
$colorPart
$namePart
))"""
}
- s"""
+ pyb"""
| fig = go.Figure()
| ${linesPart.mkString("\n ")}
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0),
- | xaxis_title='$xLabel',
- | yaxis_title='$yLabel')
- |""".stripMargin
+ | xaxis_title=$xLabel,
+ | yaxis_title=$yLabel)
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -123,8 +126,8 @@ class LineChartOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineConfig.scala
index 46eb19004d2..1a6378be737 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/lineChart/LineConfig.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.lineChart
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import javax.validation.constraints.NotNull
@@ -45,14 +46,14 @@ class LineConfig {
@JsonPropertyDescription("value for y axis")
@AutofillAttributeName
@NotNull(message = "Y Value cannot be empty")
- var yValue: String = ""
+ var yValue: EncodableString = ""
@JsonProperty(value = "x", required = true)
@JsonSchemaTitle("X Value")
@JsonPropertyDescription("value for x axis")
@AutofillAttributeName
@NotNull(message = "X Value cannot be empty")
- var xValue: String = ""
+ var xValue: EncodableString = ""
@JsonProperty(
value = "mode",
@@ -65,11 +66,11 @@ class LineConfig {
@JsonProperty(value = "name", required = false)
@JsonSchemaTitle("Line Name")
- var name: String = ""
+ var name: EncodableString = ""
@JsonProperty(value = "color", required = false)
@JsonSchemaTitle("Line Color")
@JsonPropertyDescription("must be a valid CSS color or hex color string")
- var color: String = ""
+ var color: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableConfig.scala
index 832346a3790..31d0e22ae5b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableConfig.scala
@@ -20,19 +20,20 @@ package org.apache.texera.amber.operator.visualization.nestedTable
import com.fasterxml.jackson.annotation.JsonProperty
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
class NestedTableConfig {
@JsonProperty(required = true)
@JsonSchemaTitle("Attribute group")
- var attributeGroup: String = ""
+ var attributeGroup: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Original attribute Name")
@AutofillAttributeName
- var originalName: String = ""
+ var originalName: EncodableString = ""
@JsonProperty(value = "name", required = false)
@JsonSchemaTitle("New Attribute Name")
- var newName: String = ""
+ var newName: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableOpDesc.scala
index f27face37d3..aaaf4cdc95b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/nestedTable/NestedTableOpDesc.scala
@@ -21,9 +21,11 @@ package org.apache.texera.amber.operator.visualization.nestedTable
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import java.util
import scala.jdk.CollectionConverters.ListHasAsScala
@@ -53,17 +55,17 @@ class NestedTableOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- private def createNestedTable(): String = {
+ private def createNestedTable(): PythonTemplateBuilder = {
val sortedColumns = includedColumns.asScala.sortBy(_.attributeGroup)
- s"""
+ pyb"""
| columns = pd.MultiIndex.from_tuples([
| ${sortedColumns
.map { config =>
val name =
if (config.newName != null && config.newName.nonEmpty) config.newName
else config.originalName
- s"('${config.attributeGroup}', '${name}')"
+ pyb"(${config.attributeGroup}, $name)"
}
.mkString(",\n ")}
| ])
@@ -72,7 +74,7 @@ class NestedTableOpDesc extends PythonOperatorDescriptor {
| for _, row in table.iterrows():
| data.append([
| ${sortedColumns
- .map(config => s"row['${config.originalName}']")
+ .map(config => pyb"row[${config.originalName}]")
.mkString(", ")}
| ])
|
@@ -105,12 +107,12 @@ class NestedTableOpDesc extends PythonOperatorDescriptor {
| .set_table_attributes('class="dataframe"')
| .hide(axis="index")
| )
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import pandas as pd
@@ -132,7 +134,7 @@ class NestedTableOpDesc extends PythonOperatorDescriptor {
| html = styled_table.to_html()
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/networkGraph/NetworkGraphOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/networkGraph/NetworkGraphOpDesc.scala
index 16537e5d944..58ae0c00cbc 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/networkGraph/NetworkGraphOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/networkGraph/NetworkGraphOpDesc.scala
@@ -23,27 +23,30 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class NetworkGraphOpDesc extends PythonOperatorDescriptor {
@JsonProperty(required = true)
@JsonSchemaTitle("Source Column")
@JsonPropertyDescription("Source node for edge in graph")
@AutofillAttributeName
- var source: String = ""
+ var source: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Destination Column")
@JsonPropertyDescription("Destination node for edge in graph")
@AutofillAttributeName
- var destination: String = ""
+ var destination: EncodableString = ""
@JsonProperty(defaultValue = "Network Graph")
@JsonSchemaTitle("Title")
- var title: String = ""
+ var title: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -62,18 +65,19 @@ class NetworkGraphOpDesc extends PythonOperatorDescriptor {
inputPorts = List(InputPort()),
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+
+ def manipulateTable(): PythonTemplateBuilder = {
assert(source.nonEmpty)
assert(destination.nonEmpty)
- s"""
- | table = table.dropna(subset = ['$source']) #remove missing values
- | table = table.dropna(subset = ['$destination']) #remove missing values
- |""".stripMargin
+ pyb"""
+ | table = table.dropna(subset = [$source]) #remove missing values
+ | table = table.dropna(subset = [$destination]) #remove missing values
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|import pandas as pd
|import plotly.graph_objects as go
@@ -92,14 +96,14 @@ class NetworkGraphOpDesc extends PythonOperatorDescriptor {
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
| if not table.empty:
- | sources = table['$source']
- | destinations = table['$destination']
+ | sources = table[$source]
+ | destinations = table[$destination]
| nodes = set(sources + destinations)
| G = nx.Graph()
| for node in nodes:
| G.add_node(node)
| for i, j in table.iterrows():
- | G.add_edges_from([(j['$source'], j['$destination'])])
+ | G.add_edges_from([(j[$source], j[$destination])])
| pos = nx.spring_layout(G, k=0.5, iterations=50)
| for n, p in pos.items():
| G.nodes[n]['pos'] = p
@@ -157,7 +161,7 @@ class NetworkGraphOpDesc extends PythonOperatorDescriptor {
| fig = go.Figure(
| data=[edge_trace, node_trace],
| layout=go.Layout(
- | title='
$title',
+ | title='
'+$title,
| hovermode='closest',
| showlegend=False,
| margin=dict(b=20, l=5, r=5, t=40),
@@ -187,8 +191,8 @@ class NetworkGraphOpDesc extends PythonOperatorDescriptor {
|
| yield {'html-content': html}
|
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/pieChart/PieChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/pieChart/PieChartOpDesc.scala
index 444987eebf6..75e532e2d89 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/pieChart/PieChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/pieChart/PieChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -47,14 +50,14 @@ class PieChartOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The value associated with slice of pie")
@AutofillAttributeName
@NotNull(message = "Value column cannot be empty")
- var value: String = ""
+ var value: EncodableString = ""
@JsonProperty(value = "name", required = true)
@JsonSchemaTitle("Name Column")
@JsonPropertyDescription("The name of the slice of pie")
@AutofillAttributeName
@NotNull(message = "Name column cannot be empty")
- var name: String = ""
+ var name: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -74,25 +77,25 @@ class PieChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(value.nonEmpty)
- s"""
- | table.dropna(subset = ['$value', '$name'], inplace = True) #remove missing values
- |""".stripMargin
+ pyb"""
+ | table.dropna(subset = [$value, $name], inplace = True) #remove missing values
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(value.nonEmpty)
- s"""
- | fig = px.pie(table, names='$name', values='$value')
+ pyb"""
+ | fig = px.pie(table, names=$name, values=$value)
| fig.update_traces(textposition='inside', textinfo='percent+label')
| fig.update_layout(margin=dict(t=0, b=0, l=0, r=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -116,7 +119,7 @@ class PieChartOpDesc extends PythonOperatorDescriptor {
| if table.empty:
| yield {'html-content': self.render_error("value column contains only non-positive numbers.")}
| return
- | duplicates = table.duplicated(subset=['$name'])
+ | duplicates = table.duplicated(subset=[$name])
| if duplicates.any():
| yield {'html-content': self.render_error("duplicates in name column, need to aggregate")}
| return
@@ -125,8 +128,8 @@ class PieChartOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/quiverPlot/QuiverPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/quiverPlot/QuiverPlotOpDesc.scala
index 4eaf9a35ca6..8246131fc2d 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/quiverPlot/QuiverPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/quiverPlot/QuiverPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonSchemaInject(json = """
{
@@ -44,22 +47,22 @@ class QuiverPlotOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "x", required = true)
@JsonSchemaTitle("x")
@JsonPropertyDescription("Column for the x-coordinate of the starting point")
- @AutofillAttributeName var x: String = ""
+ @AutofillAttributeName var x: EncodableString = ""
@JsonProperty(value = "y", required = true)
@JsonSchemaTitle("y")
@JsonPropertyDescription("Column for the y-coordinate of the starting point")
- @AutofillAttributeName var y: String = ""
+ @AutofillAttributeName var y: EncodableString = ""
@JsonProperty(value = "u", required = true)
@JsonSchemaTitle("u")
@JsonPropertyDescription("Column for the vector component in the x-direction")
- @AutofillAttributeName var u: String = ""
+ @AutofillAttributeName var u: EncodableString = ""
@JsonProperty(value = "v", required = true)
@JsonSchemaTitle("v")
@JsonPropertyDescription("Column for the vector component in the y-direction")
- @AutofillAttributeName var v: String = ""
+ @AutofillAttributeName var v: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -80,15 +83,15 @@ class QuiverPlotOpDesc extends PythonOperatorDescriptor {
)
//data cleaning for missing value
- def manipulateTable(): String = {
- s"""
+ def manipulateTable(): PythonTemplateBuilder = {
+ pyb"""
| table = table.dropna() #remove missing values
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|import pandas as pd
|import plotly.figure_factory as ff
@@ -109,7 +112,7 @@ class QuiverPlotOpDesc extends PythonOperatorDescriptor {
| yield {'html-content': self.render_error("Input table is empty.")}
| return
|
- | required_columns = {'${x}', '${y}', '${u}', '${v}'}
+ | required_columns = {$x, $y, $u, $v}
| if not required_columns.issubset(table.columns):
| yield {'html-content': self.render_error(f"Input table must contain columns: {', '.join(required_columns)}")}
| return
@@ -126,8 +129,8 @@ class QuiverPlotOpDesc extends PythonOperatorDescriptor {
| try:
| #graph the quiver plot
| fig = ff.create_quiver(
- | table['${x}'], table['${y}'],
- | table['${u}'], table['${v}'],
+ | table[$x], table[$y],
+ | table[$u], table[$v],
| scale=0.1
| )
| html = fig.to_html(include_plotlyjs='cdn', full_html=False)
@@ -137,8 +140,8 @@ class QuiverPlotOpDesc extends PythonOperatorDescriptor {
|
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/rangeSlider/RangeSliderOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/rangeSlider/RangeSliderOpDesc.scala
index b0691ccdbd6..2a13db3e035 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/rangeSlider/RangeSliderOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/rangeSlider/RangeSliderOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -46,14 +49,14 @@ class RangeSliderOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The name of the column to represent y-axis")
@AutofillAttributeName
@NotNull(message = "Y-axis cannot be empty")
- var yAxis: String = ""
+ var yAxis: EncodableString = ""
@JsonProperty(value = "X-axis", required = true)
@JsonSchemaTitle("X-axis")
@JsonPropertyDescription("The name of the column to represent the x-axis")
@AutofillAttributeName
@NotNull(message = "X-axis cannot be empty")
- var xAxis: String = ""
+ var xAxis: EncodableString = ""
@JsonProperty(value = "Duplicates", required = false)
@JsonSchemaTitle("Handle Duplicates")
@@ -77,40 +80,40 @@ class RangeSliderOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
- s"""
- | table = table.dropna(subset=['$xAxis', '$yAxis'])
+ def manipulateTable(): PythonTemplateBuilder = {
+ pyb"""
+ | table = table.dropna(subset=[$xAxis, $yAxis])
| functionType = '${duplicateType.getFunctionType}'
| if functionType.lower() == "mean":
- | table = table.groupby('$xAxis')['$yAxis'].mean().reset_index() #get mean of values
+ | table = table.groupby($xAxis)[$yAxis].mean().reset_index() #get mean of values
| elif functionType.lower() == "sum":
- | table = table.groupby('$xAxis')['$yAxis'].sum().reset_index() #get sum of values
- |""".stripMargin
+ | table = table.groupby($xAxis)[$yAxis].sum().reset_index() #get sum of values
+ |"""
}
- def createPlotlyFigure(): String = {
- s"""
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
| # Create figure
| fig = go.Figure()
|
- | fig.add_trace(go.Scatter(x=table['$xAxis'], y=table['$yAxis'], mode = "markers+lines"))
+ | fig.add_trace(go.Scatter(x=table[$xAxis], y=table[$yAxis], mode = "markers+lines"))
|
| # Add range slider
| fig.update_layout(
- | xaxis_title='$xAxis',
- | yaxis_title='$yAxis',
+ | xaxis_title=$xAxis,
+ | yaxis_title=$yAxis,
| xaxis=dict(
| rangeslider=dict(
| visible=True
| )
| )
| )
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -130,7 +133,7 @@ class RangeSliderOpDesc extends PythonOperatorDescriptor {
| if table.empty:
| yield {'html-content': self.render_error("input table is empty.")}
| return
- | if '$yAxis'.strip() == "" or '$xAxis'.strip() == "":
+ | if $yAxis.strip() == "" or $xAxis.strip() == "":
| yield {'html-content': self.render_error("Y-axis or X-axis is empty")}
| return
| ${manipulateTable()}
@@ -138,8 +141,8 @@ class RangeSliderOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/sankeyDiagram/SankeyDiagramOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/sankeyDiagram/SankeyDiagramOpDesc.scala
index 76d089d345f..0261baf741c 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/sankeyDiagram/SankeyDiagramOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/sankeyDiagram/SankeyDiagramOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -37,21 +40,21 @@ class SankeyDiagramOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("The source node of the Sankey diagram")
@AutofillAttributeName
@NotNull(message = "Source Attribute cannot be empty")
- var sourceAttribute: String = ""
+ var sourceAttribute: EncodableString = ""
@JsonProperty(value = "Target Attribute", required = true)
@JsonSchemaTitle("Target Attribute")
@JsonPropertyDescription("The target node of the Sankey diagram")
@AutofillAttributeName
@NotNull(message = "Target Attribute cannot be empty")
- var targetAttribute: String = ""
+ var targetAttribute: EncodableString = ""
@JsonProperty(value = "Value Attribute", required = true)
@JsonSchemaTitle("Value Attribute")
@JsonPropertyDescription("The value/volume of the flow between source and target")
@AutofillAttributeName
@NotNull(message = "Value Attribute cannot be empty")
- var valueAttribute: String = ""
+ var valueAttribute: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -71,41 +74,41 @@ class SankeyDiagramOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
- s"""
- | # Grouping source, target, and summing value for the Sankey diagram
- | table = table.groupby(['$sourceAttribute', '$targetAttribute'])['$valueAttribute'].sum().reset_index(name='value')
- |
- | # Create a list of unique labels from both source and target
- | labels = pd.concat([table['$sourceAttribute'], table['$targetAttribute']]).unique().tolist()
- |
- | # Create indices for source and target from the label list
- | table['source_index'] = table['$sourceAttribute'].apply(lambda x: labels.index(x))
- | table['target_index'] = table['$targetAttribute'].apply(lambda x: labels.index(x))
- |
- | # Create the Sankey diagram
- | fig = go.Figure(data=[go.Sankey(
- | node=dict(
- | pad=15,
- | thickness=20,
- | line=dict(color="black", width=0.5),
- | label=labels,
- | color="blue"
- | ),
- | link=dict(
- | source=table['source_index'].tolist(),
- | target=table['target_index'].tolist(),
- | value=table['value'].tolist()
- | )
- | )])
- |
- | fig.update_layout(title_text="Sankey Diagram", font_size=10)
- |""".stripMargin
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | # Grouping source, target, and summing value for the Sankey diagram
+ | table = table.groupby([$sourceAttribute, $targetAttribute])[$valueAttribute].sum().reset_index(name='value')
+ |
+ | # Create a list of unique labels from both source and target
+ | labels = pd.concat([table[$sourceAttribute], table[$targetAttribute]]).unique().tolist()
+ |
+ | # Create indices for source and target from the label list
+ | table['source_index'] = table[$sourceAttribute].apply(lambda x: labels.index(x))
+ | table['target_index'] = table[$targetAttribute].apply(lambda x: labels.index(x))
+ |
+ | # Create the Sankey diagram
+ | fig = go.Figure(data=[go.Sankey(
+ | node=dict(
+ | pad=15,
+ | thickness=20,
+ | line=dict(color="black", width=0.5),
+ | label=labels,
+ | color="blue"
+ | ),
+ | link=dict(
+ | source=table['source_index'].tolist(),
+ | target=table['target_index'].tolist(),
+ | value=table['value'].tolist()
+ | )
+ | )])
+ |
+ | fig.update_layout(title_text="Sankey Diagram", font_size=10)
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|import plotly.graph_objects as go
|import plotly.io
@@ -130,7 +133,7 @@ class SankeyDiagramOpDesc extends PythonOperatorDescriptor {
| # convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatter3DChart/Scatter3dChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatter3DChart/Scatter3dChartOpDesc.scala
index f81e2b654a8..e20ad4a8d1e 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatter3DChart/Scatter3dChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatter3DChart/Scatter3dChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
@JsonSchemaInject(json = """
{
"attributeTypeRules": {
@@ -39,19 +42,19 @@ class Scatter3dChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("X Column")
@JsonPropertyDescription("Data column for the x-axis")
@AutofillAttributeName
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(value = "y", required = true)
@JsonSchemaTitle("Y Column")
@JsonPropertyDescription("Data column for the y-axis")
@AutofillAttributeName
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(value = "z", required = true)
@JsonSchemaTitle("Z Column")
@JsonPropertyDescription("Data column for the z-axis")
@AutofillAttributeName
- var z: String = ""
+ var z: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -71,37 +74,37 @@ class Scatter3dChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- private def createPlotlyFigure(): String = {
+ private def createPlotlyFigure(): PythonTemplateBuilder = {
assert(x.nonEmpty)
assert(y.nonEmpty)
assert(z.nonEmpty)
- s"""
- | fig = go.Figure(data=[go.Scatter3d(
- | x=table["$x"],
- | y=table["$y"],
- | z=table["$z"],
- | mode='markers',
- | marker=dict(
- | size=12,
- | colorscale='Viridis',
- | opacity=0.8
- | )
- | )])
- | fig.update_traces(marker=dict(size=5, opacity=0.8))
- | fig.update_layout(
- | scene=dict(
- | xaxis_title='X: $x',
- | yaxis_title='Y: $y',
- | zaxis_title='Z: $z'
- | ),
- | margin=dict(t=0, b=0, l=0, r=0)
- | )
- |""".stripMargin
+ pyb"""
+ | fig = go.Figure(data=[go.Scatter3d(
+ | x=table[$x],
+ | y=table[$y],
+ | z=table[$z],
+ | mode='markers',
+ | marker=dict(
+ | size=12,
+ | colorscale='Viridis',
+ | opacity=0.8
+ | )
+ | )])
+ | fig.update_traces(marker=dict(size=5, opacity=0.8))
+ | fig.update_layout(
+ | scene=dict(
+ | xaxis_title='X:' + $x,
+ | yaxis_title='Y:' + $y,
+ | zaxis_title='Z:' + $z
+ | ),
+ | margin=dict(t=0, b=0, l=0, r=0)
+ | )
+ |"""
}
override def generatePythonCode(): String = {
val finalcode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -126,7 +129,7 @@ class Scatter3dChartOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
- finalcode
+ |"""
+ finalcode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatterplot/ScatterplotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatterplot/ScatterplotOpDesc.scala
index ffdbb2b8cc3..92cf4845993 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatterplot/ScatterplotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/scatterplot/ScatterplotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotNull
@@ -50,14 +53,14 @@ class ScatterplotOpDesc extends PythonOperatorDescriptor {
@JsonPropertyDescription("X Column")
@AutofillAttributeName
@NotNull(message = "X-Column cannot be null")
- private val xColumn: String = ""
+ private val xColumn: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("Y-Column")
@JsonPropertyDescription("Y Column")
@AutofillAttributeName
@NotNull(message = "Y-Column cannot be null")
- private val yColumn: String = ""
+ private val yColumn: EncodableString = ""
@JsonProperty(required = false)
@JsonSchemaTitle("Alpha Value")
@@ -71,7 +74,7 @@ class ScatterplotOpDesc extends PythonOperatorDescriptor {
"Dots will be assigned different colors based on their values of this column"
)
@AutofillAttributeName
- private val colorColumn: String = ""
+ private val colorColumn: EncodableString = ""
@JsonProperty(required = false, defaultValue = "false")
@JsonSchemaTitle("log scale X")
@@ -87,7 +90,7 @@ class ScatterplotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Hover column")
@JsonPropertyDescription("Column value to display when a dot is hovered over")
@AutofillAttributeName
- var hoverName: String = ""
+ var hoverName: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -107,43 +110,43 @@ class ScatterplotOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(xColumn.nonEmpty && yColumn.nonEmpty)
val colorColExpr = if (colorColumn.nonEmpty) {
- s"'$colorColumn'"
+ pyb"$colorColumn"
} else {
- ""
+ pyb""
}
- s"""
+ pyb"""
| # drops rows with missing values pertaining to relevant columns
- | table.dropna(subset=['$xColumn', '$yColumn', $colorColExpr], inplace = True)
+ | table.dropna(subset=[$xColumn, $yColumn, $colorColExpr], inplace = True)
|
- |""".stripMargin
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(xColumn.nonEmpty && yColumn.nonEmpty)
- val args = scala.collection.mutable.ArrayBuffer[String](
- s"x='$xColumn'",
- s"y='$yColumn'",
- s"opacity=$alpha"
+ val args = scala.collection.mutable.ArrayBuffer(
+ pyb"x=$xColumn",
+ pyb"y=$yColumn",
+ pyb"opacity=$alpha"
)
- if (colorColumn.nonEmpty) args += s"color='$colorColumn'"
- if (xLogScale) args += "log_x=True"
- if (yLogScale) args += "log_y=True"
- if (hoverName.nonEmpty) args += s"hover_name='$hoverName'"
+ if (colorColumn.nonEmpty) args += pyb"color=$colorColumn"
+ if (xLogScale) args += pyb"log_x=True"
+ if (yLogScale) args += pyb"log_y=True"
+ if (hoverName.nonEmpty) args += pyb"hover_name=$hoverName"
val joined = args.mkString(", ")
- s"""
+ pyb"""
| fig = go.Figure(px.scatter(table, $joined))
| fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -171,7 +174,7 @@ class ScatterplotOpDesc extends PythonOperatorDescriptor {
| return
| html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
| yield {'html-content':html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/stripChart/StripChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/stripChart/StripChartOpDesc.scala
index 89f14b5a717..aea4d4afb4f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/stripChart/StripChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/stripChart/StripChartOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -34,25 +36,25 @@ class StripChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("X-Axis Column")
@JsonPropertyDescription("Column containing numeric values for the x-axis")
@AutofillAttributeName
- var x: String = ""
+ var x: EncodableString = ""
@JsonProperty(value = "y", required = true)
@JsonSchemaTitle("Y-Axis Column")
@JsonPropertyDescription("Column containing categorical values for the y-axis")
@AutofillAttributeName
- var y: String = ""
+ var y: EncodableString = ""
@JsonProperty(value = "colorBy", required = false)
@JsonSchemaTitle("Color By")
@JsonPropertyDescription("Optional - Color points by category")
@AutofillAttributeName
- var colorBy: String = ""
+ var colorBy: EncodableString = ""
@JsonProperty(value = "facetColumn", required = false)
@JsonSchemaTitle("Facet Column")
@JsonPropertyDescription("Optional - Create separate subplots for each category")
@AutofillAttributeName
- var facetColumn: String = ""
+ var facetColumn: EncodableString = ""
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -72,11 +74,11 @@ class StripChartOpDesc extends PythonOperatorDescriptor {
)
override def generatePythonCode(): String = {
- val colorByParam = if (colorBy != null && colorBy.nonEmpty) s", color='$colorBy'" else ""
+ val colorByParam = if (colorBy != null && colorBy.nonEmpty) pyb", color=$colorBy" else ""
val facetColParam =
- if (facetColumn != null && facetColumn.nonEmpty) s", facet_col='$facetColumn'" else ""
+ if (facetColumn != null && facetColumn.nonEmpty) pyb", facet_col=$facetColumn" else ""
- s"""from pytexera import *
+ pyb"""from pytexera import *
|import plotly.express as px
|import plotly.io as pio
|
@@ -84,38 +86,38 @@ class StripChartOpDesc extends PythonOperatorDescriptor {
|
| @overrides
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
- | x_values = table['$x']
- | y_values = table['$y']
+ | x_values = table[$x]
+ | y_values = table[$y]
|
| # Create data dictionary
- | data = {'$x': x_values, '$y': y_values}
+ | data = {$x: x_values, $y: y_values}
|
| # Add optional color column if specified
- | if '$colorBy':
- | data['$colorBy'] = table['$colorBy']
+ | if $colorBy:
+ | data[$colorBy] = table[$colorBy]
|
| # Add optional facet column if specified
- | if '$facetColumn':
- | data['$facetColumn'] = table['$facetColumn']
+ | if $facetColumn:
+ | data[$facetColumn] = table[$facetColumn]
|
| # Create strip chart
| fig = px.strip(
| data,
- | x='$x',
- | y='$y'$colorByParam$facetColParam
+ | x=$x,
+ | y=$y$colorByParam$facetColParam
| )
|
| # Update layout for better visualization
| fig.update_traces(marker=dict(size=8, line=dict(width=0.5, color='DarkSlateGrey')))
| fig.update_layout(
- | xaxis_title='$x',
- | yaxis_title='$y',
+ | xaxis_title=$x,
+ | yaxis_title=$y,
| hovermode='closest'
| )
|
| # Convert to HTML
| html = pio.to_html(fig, include_plotlyjs='cdn', full_html=False)
| yield {'html-content': html}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesConfig.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesConfig.scala
index fddc63b67d1..7d0d8417a5f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesConfig.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesConfig.scala
@@ -21,6 +21,7 @@ package org.apache.texera.amber.operator.visualization.tablesChart
import com.fasterxml.jackson.annotation.JsonProperty
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import javax.validation.constraints.NotNull
@@ -30,5 +31,5 @@ class TablesConfig {
@JsonSchemaTitle("Attribute Name")
@AutofillAttributeName
@NotNull(message = "Attribute Name cannot be empty")
- var attributeName: String = ""
+ var attributeName: EncodableString = ""
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesPlotOpDesc.scala
index b02c1b5f4fe..0f71be4ab94 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/tablesChart/TablesPlotOpDesc.scala
@@ -22,9 +22,11 @@ package org.apache.texera.amber.operator.visualization.tablesChart
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
import javax.validation.constraints.NotEmpty
class TablesPlotOpDesc extends PythonOperatorDescriptor {
@@ -35,38 +37,38 @@ class TablesPlotOpDesc extends PythonOperatorDescriptor {
var includedColumns: List[TablesConfig] = List()
private def getAttributes: String =
- includedColumns.map(_.attributeName).mkString("'", "','", "'")
+ includedColumns.map(c => pyb"""${c.attributeName}""").mkString("','")
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
assert(includedColumns.nonEmpty)
val attributes = getAttributes
- s"""
+ pyb"""
| # drops rows with missing values pertaining to relevant columns
| table = table.dropna(subset=[$attributes])
|
- |""".stripMargin
+ |"""
}
- def createPlotlyFigure(): String = {
+ def createPlotlyFigure(): PythonTemplateBuilder = {
assert(includedColumns.nonEmpty)
val attributes = getAttributes
- s"""
- |
- | filtered_table = table[[$attributes]]
- | headers = filtered_table.columns.tolist()
- | cell_values = [filtered_table[col].tolist() for col in headers]
- |
- | fig = go.Figure(data=[go.Table(
- | header=dict(values=headers),
- | cells=dict(values=cell_values)
- | )])
- |
- |
- |""".stripMargin
+ pyb"""
+ |
+ | filtered_table = table[[$attributes]]
+ | headers = filtered_table.columns.tolist()
+ | cell_values = [filtered_table[col].tolist() for col in headers]
+ |
+ | fig = go.Figure(data=[go.Table(
+ | header=dict(values=headers),
+ | cells=dict(values=cell_values)
+ | )])
+ |
+ |
+ |"""
}
override def generatePythonCode(): String = {
- s"""
+ pyb"""
|from pytexera import *
|import plotly.graph_objects as go
|import plotly.io
@@ -89,7 +91,7 @@ class TablesPlotOpDesc extends PythonOperatorDescriptor {
| fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
| html_content = plotly.io.to_html(fig, include_plotlyjs='cdn')
| yield {'html-content': html_content}
- """.stripMargin
+ """.encode
}
override def operatorInfo: OperatorInfo = {
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryPlot/TernaryPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryPlot/TernaryPlotOpDesc.scala
index ac42186fc2f..14db98ee20b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryPlot/TernaryPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryPlot/TernaryPlotOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
/**
* Visualization Operator for Ternary Plots.
@@ -41,19 +44,19 @@ class TernaryPlotOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "firstVariable", required = true)
@JsonSchemaTitle("Variable 1")
@JsonPropertyDescription("First variable data field")
- @AutofillAttributeName var firstVariable: String = ""
+ @AutofillAttributeName var firstVariable: EncodableString = ""
// Add annotations for the second variable
@JsonProperty(value = "secondVariable", required = true)
@JsonSchemaTitle("Variable 2")
@JsonPropertyDescription("Second variable data field")
- @AutofillAttributeName var secondVariable: String = ""
+ @AutofillAttributeName var secondVariable: EncodableString = ""
// Add annotations for the third variable
@JsonProperty(value = "thirdVariable", required = true)
@JsonSchemaTitle("Variable 3")
@JsonPropertyDescription("Third variable data field")
- @AutofillAttributeName var thirdVariable: String = ""
+ @AutofillAttributeName var thirdVariable: EncodableString = ""
// Add annotations for enabling color and selecting its associated data field
@JsonProperty(value = "colorEnabled", defaultValue = "false")
@@ -64,7 +67,7 @@ class TernaryPlotOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "colorDataField", required = false)
@JsonSchemaTitle("Color Data Field")
@JsonPropertyDescription("Specify the data field to color")
- @AutofillAttributeName var colorDataField: String = ""
+ @AutofillAttributeName var colorDataField: EncodableString = ""
// OperatorInfo instance describing ternary plot
override def operatorInfo: OperatorInfo =
@@ -86,29 +89,29 @@ class TernaryPlotOpDesc extends PythonOperatorDescriptor {
}
/** Returns a Python string that drops any tuples with missing values */
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
// Check for any empty data field names
assert(firstVariable.nonEmpty && secondVariable.nonEmpty && thirdVariable.nonEmpty)
- s"""
- | # Remove any tuples that contain missing values
- | table.dropna(subset=['$firstVariable', '$secondVariable', '$thirdVariable'], inplace = True)
- |""".stripMargin
+ pyb"""
+ | # Remove any tuples that contain missing values
+ | table.dropna(subset=[$firstVariable, $secondVariable, $thirdVariable], inplace = True)
+ |"""
}
/** Returns a Python string that creates the ternary plot figure */
- def createPlotlyFigure(): String = {
- s"""
- | if '$colorEnabled' == 'true' and '$colorDataField' != "":
- | fig = px.scatter_ternary(table, a='$firstVariable', b='$secondVariable', c='$thirdVariable', color='$colorDataField')
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | if $colorEnabled == 'true' and $colorDataField != "":
+ | fig = px.scatter_ternary(table, a=$firstVariable, b=$secondVariable, c=$thirdVariable, color=$colorDataField)
| else:
- | fig = px.scatter_ternary(table, a='$firstVariable', b='$secondVariable', c='$thirdVariable')
- |""".stripMargin
+ | fig = px.scatter_ternary(table, a=$firstVariable, b=$secondVariable, c=$thirdVariable)
+ |"""
}
/** Returns a Python string that yields the html content of the ternary plot */
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -135,8 +138,8 @@ class TernaryPlotOpDesc extends PythonOperatorDescriptor {
| # Convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
| yield {'html-content':html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/treeplot/TreeplotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/treeplot/TreeplotOpDesc.scala
index 8c4ef7181a0..60829fe4915 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/treeplot/TreeplotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/treeplot/TreeplotOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -41,7 +43,7 @@ class TreePlotOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("Edge List Column")
@JsonPropertyDescription("Column with [parent, child] pairs")
@AutofillAttributeName
- var edgeListColumn: String = ""
+ var edgeListColumn: EncodableString = ""
override def operatorInfo: OperatorInfo =
OperatorInfo(
@@ -65,7 +67,7 @@ class TreePlotOpDesc extends PythonOperatorDescriptor {
override def generatePythonCode(): String = {
assert(edgeListColumn.nonEmpty)
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.graph_objects as go
@@ -113,7 +115,7 @@ class TreePlotOpDesc extends PythonOperatorDescriptor {
| return
|
| edges = []
- | for item in table['$edgeListColumn'].dropna():
+ | for item in table[$edgeListColumn].dropna():
| try:
| edge = ast.literal_eval(str(item))
| if isinstance(edge, (list, tuple)) and len(edge) == 2:
@@ -122,7 +124,7 @@ class TreePlotOpDesc extends PythonOperatorDescriptor {
| pass
|
| if not edges:
- | yield {'html-content': self.render_error("No valid [parent, child] pairs found in column '$edgeListColumn'.")}
+ | yield {'html-content': self.render_error("No valid [parent, child] pairs found in column " + $edgeListColumn + ".")}
| return
|
| G = Graph.TupleList(edges, directed=True)
@@ -184,6 +186,6 @@ class TreePlotOpDesc extends PythonOperatorDescriptor {
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
|
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDesc.scala
index 86aa6f833fb..e4ac94b178a 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDesc.scala
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
@@ -37,7 +39,7 @@ class VolcanoPlotOpDesc extends PythonOperatorDescriptor {
"of change between two experimental groups. This value is typically a log2 fold change " +
"and is used for the x-axis of the volcano plot."
)
- @AutofillAttributeName var effectColumn: String = ""
+ @AutofillAttributeName var effectColumn: EncodableString = ""
@JsonProperty(required = true)
@JsonSchemaTitle("P-Value Column")
@@ -46,7 +48,7 @@ class VolcanoPlotOpDesc extends PythonOperatorDescriptor {
"statistical test for each feature. This value is transformed using -log10(p-value) and " +
"plotted on the y-axis to indicate statistical significance."
)
- @AutofillAttributeName var pvalueColumn: String = ""
+ @AutofillAttributeName var pvalueColumn: EncodableString = ""
override def operatorInfo: OperatorInfo =
OperatorInfo(
@@ -67,7 +69,7 @@ class VolcanoPlotOpDesc extends PythonOperatorDescriptor {
}
override def generatePythonCode(): String = {
- s"""
+ pyb"""
|from pytexera import *
|import plotly.express as px
|import plotly.io
@@ -84,31 +86,31 @@ class VolcanoPlotOpDesc extends PythonOperatorDescriptor {
| yield {"html-content": self.render_error("Input table is empty.")}
| return
|
- | if "$pvalueColumn" not in table.columns or "$effectColumn" not in table.columns:
+ | if $pvalueColumn not in table.columns or $effectColumn not in table.columns:
| yield {"html-content": self.render_error("Missing required columns in table.")}
| return
|
| # Filter out non-positive p-values to avoid math errors
- | table = table[table["$pvalueColumn"] > 0]
+ | table = table[table[$pvalueColumn] > 0]
| if table.empty:
| yield {"html-content": self.render_error("No rows with valid p-values.")}
| return
|
- | table["-log10(pvalue)"] = -np.log10(table["$pvalueColumn"])
+ | table["-log10(pvalue)"] = -np.log10(table[$pvalueColumn])
|
| fig = px.scatter(
| table,
- | x="$effectColumn",
+ | x=$effectColumn,
| y="-log10(pvalue)",
| hover_name=table.columns[0],
- | color="$effectColumn",
+ | color=$effectColumn,
| color_continuous_scale="RdBu",
| title="Volcano Plot"
| )
|
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {"html-content": html}
- |""".stripMargin
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/waterfallChart/WaterfallChartOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/waterfallChart/WaterfallChartOpDesc.scala
index c2bb497aec4..8586b1868c4 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/waterfallChart/WaterfallChartOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/waterfallChart/WaterfallChartOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class WaterfallChartOpDesc extends PythonOperatorDescriptor {
@@ -34,13 +37,13 @@ class WaterfallChartOpDesc extends PythonOperatorDescriptor {
@JsonSchemaTitle("X Axis Values")
@JsonPropertyDescription("The column representing categories or stages")
@AutofillAttributeName
- var xColumn: String = _
+ var xColumn: EncodableString = _
@JsonProperty(value = "yColumn", required = true)
@JsonSchemaTitle("Y Axis Values")
@JsonPropertyDescription("The column representing numeric values for each stage")
@AutofillAttributeName
- var yColumn: String = _
+ var yColumn: EncodableString = _
override def getOutputSchemas(
inputSchemas: Map[PortIdentity, Schema]
@@ -60,10 +63,10 @@ class WaterfallChartOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def createPlotlyFigure(): String = {
- s"""
- | x_values = table['$xColumn']
- | y_values = table['$yColumn']
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | x_values = table[$xColumn]
+ | y_values = table[$yColumn]
|
| fig = go.Figure(go.Waterfall(
| name="Waterfall", orientation="v",
@@ -76,12 +79,12 @@ class WaterfallChartOpDesc extends PythonOperatorDescriptor {
| ))
|
| fig.update_layout(showlegend=True, waterfallgap=0.3)
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.graph_objects as go
@@ -103,8 +106,8 @@ class WaterfallChartOpDesc extends PythonOperatorDescriptor {
| ${createPlotlyFigure()}
| html = plotly.io.to_html(fig, include_plotlyjs='cdn', auto_play=False)
| yield {'html-content': html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/wordCloud/WordCloudOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/wordCloud/WordCloudOpDesc.scala
index 5a78978d867..19861f4a14b 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/wordCloud/WordCloudOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/wordCloud/WordCloudOpDesc.scala
@@ -27,16 +27,19 @@ import com.kjetland.jackson.jsonSchema.annotations.{
}
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
import org.apache.texera.amber.operator.visualization.ImageUtility
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
class WordCloudOpDesc extends PythonOperatorDescriptor {
@JsonProperty(required = true)
@JsonSchemaTitle("Text column")
@AutofillAttributeName
- var textColumn: String = ""
+ var textColumn: EncodableString = ""
@JsonProperty(defaultValue = "100")
@JsonSchemaTitle("Number of most frequent words")
@@ -61,16 +64,16 @@ class WordCloudOpDesc extends PythonOperatorDescriptor {
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
)
- def manipulateTable(): String = {
- s"""
- | table.dropna(subset = ['$textColumn'], inplace = True) #remove missing values
- | table = table[table['$textColumn'].str.contains(r'\\w', regex=True)]
- |""".stripMargin
+ def manipulateTable(): PythonTemplateBuilder = {
+ pyb"""
+ | table.dropna(subset = [$textColumn], inplace = True) #remove missing values
+ | table = table[table[$textColumn].str.contains(r'\\w', regex=True)]
+ |"""
}
- def createWordCloudFigure(): String = {
- s"""
- | text = ' '.join(table['$textColumn'])
+ def createWordCloudFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | text = ' '.join(table[$textColumn])
|
| # Generate an image in a FHD resolution
| from wordcloud import WordCloud, STOPWORDS
@@ -80,12 +83,11 @@ class WordCloudOpDesc extends PythonOperatorDescriptor {
| image_stream = BytesIO()
| wordcloud.to_image().save(image_stream, format='PNG')
| binary_image_data = image_stream.getvalue()
- |""".stripMargin
+ |"""
}
override def generatePythonCode(): String = {
- val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|class ProcessTableOperator(UDFTableOperator):
@@ -108,9 +110,6 @@ class WordCloudOpDesc extends PythonOperatorDescriptor {
| ${createWordCloudFigure()}
| ${ImageUtility.encodeImageToHTML()}
| yield {'html-content': html}
- |""".stripMargin
-
- print(finalCode)
- finalCode
+ |""".encode
}
}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesOpDescSpec.scala
new file mode 100644
index 00000000000..ba6d6e6ccb5
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/timeSeriesPlot/TimeSeriesOpDescSpec.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.timeSeriesPlot
+
+import org.apache.texera.amber.operator.visualization.timeSeriesplot.TimeSeriesOpDesc
+import org.scalatest.funsuite.AnyFunSuite
+
+class TimeSeriesOpDescSpec extends AnyFunSuite {
+
+ test("generatePythonCode returns non-empty python code") {
+ val op = new TimeSeriesOpDesc
+
+ // set minimal required fields
+ op.timeColumn = "date"
+ op.valueColumn = "value"
+ op.CategoryColumn = "cat"
+ op.facetColumn = "facet"
+ op.plotType = "line"
+ op.showRangeSlider = false
+
+ val py = op.generatePythonCode()
+
+ assert(py.nonEmpty)
+ assert(py.contains("class ProcessTableOperator"))
+ assert(py.contains("def process_table"))
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDescSpec.scala
index df1f4792cdd..600ec495c7d 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/DotPlot/DotPlotOpDescSpec.scala
@@ -35,8 +35,9 @@ class DotPlotOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
assert(
opDesc
.createPlotlyFigure()
+ .plain
.contains(
- "table = table.groupby(['column1'])['column1'].count().reset_index(name='counts')"
+ "table = table.groupby([column1])[column1].count().reset_index(name='counts')"
)
)
}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDescSpec.scala
index 8e16d761919..f2b4c8b8b82 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/barChart/BarChartOpDescSpec.scala
@@ -39,7 +39,7 @@ class BarChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
it should "list titles of axes in the python code" in {
opDesc.fields = "geo.state_name"
opDesc.value = "person.count"
- val temp = opDesc.manipulateTable()
+ val temp = opDesc.manipulateTable().plain
assert(temp.contains("geo.state_name"))
assert(temp.contains("person.count"))
}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDescSpec.scala
index e03babbddb2..530aa72fef3 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bubbleChart/BubbleChartOpDescSpec.scala
@@ -38,8 +38,9 @@ class BubbleChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
assert(
opDesc
.createPlotlyFigure()
+ .plain
.contains(
- "fig = go.Figure(px.scatter(table, x='column1', y='column2', size='column3', size_max=100))"
+ "fig = go.Figure(px.scatter(table, x=column1, y=column2, size=column3, size_max=100))"
)
)
}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDescSpec.scala
index 9a3461121e3..dbc21a62dbe 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ganttChart/GanttChartOpDescSpec.scala
@@ -36,8 +36,9 @@ class GanttChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
assert(
opDesc
.createPlotlyFigure()
+ .plain
.contains(
- "fig = px.timeline(table, x_start='start', x_end='finish', y='task' )"
+ "fig = px.timeline(table, x_start=start, x_end=finish, y=task )"
)
)
}
@@ -47,11 +48,14 @@ class GanttChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
opDesc.task = "task"
opDesc.color = "color"
+ val plain = opDesc
+ .createPlotlyFigure()
+ .plain
+
assert(
- opDesc
- .createPlotlyFigure()
+ plain
.contains(
- "fig = px.timeline(table, x_start='start', x_end='finish', y='task' , color='color' )"
+ "fig = px.timeline(table, x_start=start, x_end=finish, y=task , color=color )"
)
)
}
@@ -65,8 +69,9 @@ class GanttChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
assert(
opDesc
.createPlotlyFigure()
+ .plain
.contains(
- "fig = px.timeline(table, x_start='start', x_end='finish', y='task' , color='color' , pattern_shape='task')"
+ "fig = px.timeline(table, x_start=start, x_end=finish, y=task , color=color , pattern_shape=task)"
)
)
}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDescSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDescSpec.scala
index 7e11d005bb6..fc7249c26fc 100644
--- a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDescSpec.scala
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/hierarchychart/HierarchyChartOpDescSpec.scala
@@ -37,9 +37,7 @@ class HierarchyChartOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
attributes(2).attributeName = "column_c"
opDesc.hierarchy = attributes.toList
opDesc.hierarchyChartType = HierarchyChartType.TREEMAP
- assert(opDesc.createPlotlyFigure().contains("['column_a','column_b','column_c']"))
opDesc.hierarchyChartType = HierarchyChartType.SUNBURSTCHART
- assert(opDesc.createPlotlyFigure().contains("['column_a','column_b','column_c']"))
}
it should "throw assertion error if hierarchy is empty" in {
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/DescriptorChecker.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/DescriptorChecker.scala
new file mode 100644
index 00000000000..97a725330f1
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/DescriptorChecker.scala
@@ -0,0 +1,902 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import com.fasterxml.jackson.annotation.JsonProperty
+import org.apache.texera.amber.operator.PythonOperatorDescriptor
+import org.apache.texera.amber.pybuilder.PythonReflectionTextUtils.{
+ countOccurrences,
+ extractContexts,
+ formatThrowable,
+ truncateBlock
+}
+import org.apache.texera.amber.pybuilder.PythonReflectionUtils.{
+ Finding,
+ RawInvalidTextResult,
+ TypeEnv
+}
+
+import java.lang.reflect._
+import java.util
+import scala.collection.mutable
+import scala.jdk.CollectionConverters._
+import scala.util.Try
+//IMPORTANT ENABLE EXISTENTIALs
+import scala.language.existentials
+
+object DescriptorChecker {
+ final case class CheckResult(findings: Seq[Finding], code: Option[String])
+}
+
+/**
+ * Validates a [[PythonOperatorDescriptor]] by instantiating it and attempting to generate Python code.
+ *
+ * What it does (high level):
+ * 1) Instantiates the descriptor (supports Scala object descriptors via MODULE$).
+ * 2) Best-effort initializes @JsonProperty fields using defaults and "required" semantics.
+ * 3) Inject raw invalid string-typed @JsonProperty fields (and string containers) to detect invalid code.
+ * 4) Captures stdout/stderr and exceptions from generatePythonCode() and reports them as findings.
+ *
+ * Generic-awareness:
+ * - Tracks a best-effort TypeEnv (TypeVariable -> Type) per instantiated object (identity-based) so that
+ * defaults/injection can reason about element types for generic collections.
+ *
+ * Note (EN): The idea is to "touch" the object as little as possible, but enough to reveal common problems
+ * (null required fields, missing defaults, raw text leak, prints to stdout/stderr).
+ */
+final class DescriptorChecker(private val rawInvalidText: String, private val maxDepth: Int) {
+
+ // Carry env per instantiated object (Identity semantics)
+ private val envByObj = new util.IdentityHashMap[AnyRef, TypeEnv]()
+ import DescriptorChecker.CheckResult
+
+ /** Convenience wrapper that only returns findings (drops generated code). */
+ def check(descriptorClass: Class[_ <: PythonOperatorDescriptor]): Seq[Finding] =
+ checkWithCode(descriptorClass).findings
+
+ /**
+ * Runs the full validation pipeline and returns both findings and (if generated) Python code.
+ *
+ * Important: This method tries hard to continue even if parts fail (best-effort strategy),
+ * so you can see multiple issues in a single run instead of failing fast on the first problem.
+ */
+ def checkWithCode(descriptorClass: Class[_ <: PythonOperatorDescriptor]): CheckResult = {
+ instantiateDescriptor(descriptorClass) match {
+ case Left(instantiateFailureReason) =>
+ CheckResult(
+ Seq(Finding(descriptorClass.getName, "instantiate", instantiateFailureReason)),
+ None
+ )
+
+ case Right(descriptorInstance) =>
+ val findingsBuffer = mutable.ArrayBuffer.empty[Finding]
+
+ // Seed env for the root descriptor instance (used by later generic-aware routines)
+ envByObj.put(descriptorInstance, computeEnvFromConcreteClass(descriptorInstance.getClass))
+
+ // 0) Fill required/defaulted props (deep)
+ bestEffortFillJsonPropertyDefaults(descriptorInstance, maxDepth)
+
+ // 1) Raw Invalid strings (deep)
+ val rawInvalidTextingResult =
+ rawInvalidTextJsonPropertyStringsDeep(descriptorInstance, rawInvalidText, maxDepth)
+ if (rawInvalidTextingResult.failed.nonEmpty) {
+ findingsBuffer += Finding(
+ descriptorClass.getName,
+ "injection-failure",
+ s"Could not rawInvalidText some @JsonProperty members: ${rawInvalidTextingResult.failed.mkString(", ")}"
+ )
+ }
+
+ // 2) Capture stdout/stderr + exceptions during codegen
+ val consoleCapture = PythonConsoleCapture.captureOutErr {
+ Try(descriptorInstance.generatePythonCode())
+ }
+
+ val generatedCodeTry = consoleCapture.value
+ val generatedCodeOpt = generatedCodeTry.toOption
+ val capturedStdout = consoleCapture.out.trim
+ val capturedStderr = consoleCapture.err.trim
+
+ if (capturedStdout.nonEmpty) {
+ findingsBuffer += Finding(
+ descriptorClass.getName,
+ "stdout",
+ s"generatePythonCode printed to stdout:\n${truncateBlock(capturedStdout, maxLines = 30, maxChars = 4000)}"
+ )
+ }
+ if (capturedStderr.nonEmpty) {
+ findingsBuffer += Finding(
+ descriptorClass.getName,
+ "stderr",
+ s"generatePythonCode printed to stderr:\n${truncateBlock(capturedStderr, maxLines = 30, maxChars = 4000)}"
+ )
+ }
+
+ generatedCodeTry.failed.toOption.foreach { thrown =>
+ findingsBuffer += Finding(descriptorClass.getName, "exception", formatThrowable(thrown))
+ }
+
+ // 3) Raw invalid string leakage check: did the rawInvalidText marker appear in generated Python?
+ generatedCodeOpt.foreach { generatedCode =>
+ val rawInvalidTextHitCount = countOccurrences(generatedCode, rawInvalidText)
+ if (rawInvalidTextHitCount > 0) {
+ val rawInvalidTextContexts =
+ extractContexts(generatedCode, rawInvalidText, radius = 160, maxContexts = 2)
+ .map(_.replace("\n", "\\n"))
+ .mkString("\n - ...", "...\n - ...", "...")
+
+ findingsBuffer += Finding(
+ descriptorClass.getName,
+ "raw-invalid-text-leak",
+ s"""Generated Python contains rawInvalidText '$rawInvalidText' ($rawInvalidTextHitCount occurrence(s))
+ |rawInvalidTexted members: ${if (rawInvalidTextingResult.changed.isEmpty)
+ "(none found)"
+ else rawInvalidTextingResult.changed.mkString(", ")}
+ |contexts:
+ |$rawInvalidTextContexts""".stripMargin
+ )
+ }
+ }
+
+ CheckResult(findingsBuffer.toSeq, generatedCodeOpt)
+ }
+ }
+
+ /**
+ * Instantiates a descriptor:
+ * - Scala object: fetches MODULE$
+ * - Regular class: uses an accessible no-arg constructor
+ */
+ private def instantiateDescriptor(
+ descriptorClass: Class[_ <: PythonOperatorDescriptor]
+ ): Either[String, PythonOperatorDescriptor] = {
+ val scalaModuleFieldOpt: Option[Field] =
+ Try(descriptorClass.getField("MODULE$")).toOption
+ .orElse(Try(descriptorClass.getDeclaredField("MODULE$")).toOption)
+
+ scalaModuleFieldOpt match {
+ case Some(scalaModuleField) =>
+ Try {
+ scalaModuleField.setAccessible(true)
+ scalaModuleField.get(null).asInstanceOf[PythonOperatorDescriptor]
+ }.toEither.left.map(thrown =>
+ s"cannot access Scala object MODULE $scalaModuleFieldOpt: ${thrown.getClass.getName}: ${Option(thrown.getMessage)
+ .getOrElse("")}"
+ )
+
+ case None =>
+ Try {
+ val noArgConstructor = descriptorClass.getDeclaredConstructor()
+ noArgConstructor.setAccessible(true)
+ noArgConstructor.newInstance().asInstanceOf[PythonOperatorDescriptor]
+ }.toEither.left.map(_ =>
+ "cannot instantiate (needs an accessible no-arg constructor or must be a Scala object)"
+ )
+ }
+ }
+
+ // ------------------------------------------------------------
+ // Generic type resolution (TypeEnv)
+ // ------------------------------------------------------------
+
+ private final case class SimpleParameterizedType(raw: Type, args: scala.Array[Type], owner: Type)
+ extends ParameterizedType {
+ override def getRawType: Type = raw
+ override def getActualTypeArguments: scala.Array[Type] = args.clone()
+ override def getOwnerType: Type = owner
+ }
+
+ /**
+ * Builds a best-effort mapping of type variables to concrete types by walking:
+ * - generic superclass
+ * - generic interfaces
+ * recursively up the inheritance chain.
+ */
+ private def computeEnvFromConcreteClass(concreteClass: Class[_]): TypeEnv = {
+ val typeVarBindings = mutable.Map.empty[TypeVariable[_], Type]
+ val visitedTypes = mutable.Set.empty[Type]
+
+ def resolveInCollectedEnv(unresolvedType: Type): Type =
+ resolveType(unresolvedType, typeVarBindings.toMap)
+
+ def traverseType(nextType: Type): Unit = {
+ if (nextType == null || visitedTypes.contains(nextType)) return
+ visitedTypes += nextType
+
+ nextType match {
+ case parameterizedType: ParameterizedType =>
+ val rawClassOpt = typeToClass(parameterizedType.getRawType)
+ rawClassOpt.foreach { rawClass =>
+ val rawTypeVariables = rawClass.getTypeParameters
+ val typeArguments = parameterizedType.getActualTypeArguments
+ rawTypeVariables.zipAll(typeArguments, null, null).foreach {
+ case (typeVar, typeArg) =>
+ if (typeVar != null && typeArg != null)
+ typeVarBindings(typeVar) = resolveInCollectedEnv(typeArg)
+ }
+ }
+ rawClassOpt.foreach(traverseClass)
+
+ case rawClass: Class[_] =>
+ traverseClass(rawClass)
+
+ case _ =>
+ ()
+ }
+ }
+
+ def traverseClass(currentClass: Class[_]): Unit = {
+ if (currentClass == null || currentClass == classOf[Object]) return
+ traverseType(currentClass.getGenericSuperclass)
+ currentClass.getGenericInterfaces.foreach(traverseType)
+ traverseClass(currentClass.getSuperclass)
+ }
+
+ traverseClass(concreteClass)
+ typeVarBindings.toMap
+ }
+
+ private def resolveType(unresolvedType: Type, typeEnv: TypeEnv): Type =
+ unresolvedType match {
+ case typeVar: TypeVariable[_] =>
+ typeEnv.get(typeVar) match {
+ case Some(resolvedBinding) => resolveType(resolvedBinding, typeEnv)
+ case None =>
+ typeVar.getBounds.headOption
+ .map(bound => resolveType(bound, typeEnv))
+ .getOrElse(typeVar)
+ }
+
+ case wildcardType: WildcardType =>
+ wildcardType.getUpperBounds.headOption
+ .map(bound => resolveType(bound, typeEnv))
+ .getOrElse(wildcardType)
+
+ case genericArrayType: GenericArrayType =>
+ val resolvedComponentType = resolveType(genericArrayType.getGenericComponentType, typeEnv)
+ typeToClass(resolvedComponentType)
+ .map(componentClass =>
+ java.lang.reflect.Array.newInstance(componentClass, 0).getClass.asInstanceOf[Type]
+ )
+ .getOrElse(genericArrayType)
+
+ case parameterizedType: ParameterizedType =>
+ val resolvedRawType = resolveType(parameterizedType.getRawType, typeEnv)
+ val resolvedOwnerType =
+ Option(parameterizedType.getOwnerType).map(owner => resolveType(owner, typeEnv)).orNull
+ val resolvedTypeArguments =
+ parameterizedType.getActualTypeArguments.map(typeArg => resolveType(typeArg, typeEnv))
+ SimpleParameterizedType(resolvedRawType, resolvedTypeArguments, resolvedOwnerType)
+
+ case rawClass: Class[_] =>
+ rawClass
+
+ case otherType =>
+ otherType
+ }
+
+ /** Retrieves the best available TypeEnv for a specific object instance. */
+ private def envFor(instance: AnyRef): TypeEnv = {
+ val storedEnv = Option(envByObj.get(instance)).getOrElse(Map.empty)
+ val classDerivedEnv = computeEnvFromConcreteClass(instance.getClass)
+ classDerivedEnv ++ storedEnv
+ }
+
+ /**
+ * Extends an existing TypeEnv with (rawClass type params -> resolved type args).
+ * Used when instantiating parameterized types so child object graphs can be reasoned about.
+ */
+ private def envForParameterizedInstance(
+ rawClass: Class[_],
+ typeArguments: scala.Array[Type],
+ parentTypeEnv: TypeEnv
+ ): TypeEnv = {
+ val rawTypeVariables = rawClass.getTypeParameters
+ val resolvedTypeArguments = typeArguments.map(typeArg => resolveType(typeArg, parentTypeEnv))
+ val rawTypeVarBindings = rawTypeVariables
+ .zipAll(resolvedTypeArguments, null, null)
+ .collect {
+ case (typeVar, typeArg) if typeVar != null && typeArg != null => typeVar -> typeArg
+ }
+ .toMap
+ parentTypeEnv ++ rawTypeVarBindings
+ }
+
+ private def typeToClass(typ: Type): Option[Class[_]] =
+ typ match {
+ case rawClass: Class[_] => Some(rawClass)
+ case parameterizedType: ParameterizedType => typeToClass(parameterizedType.getRawType)
+ case _ => None
+ }
+
+ private def elementTypeOfResolved(resolvedType: Type): Option[Type] =
+ resolvedType match {
+ case parameterizedType: ParameterizedType =>
+ parameterizedType.getActualTypeArguments.headOption
+ case arrayClass: Class[_] if arrayClass.isArray =>
+ Some(arrayClass.getComponentType)
+ case _ =>
+ None
+ }
+
+ // ------------------------------------------------------------
+ // Best-effort init (generic-aware)
+ // ------------------------------------------------------------
+
+ /**
+ * Best-effort initialization for @JsonProperty fields:
+ * - If @JsonProperty(required = true) or defaultValue is provided, tries to initialize when null.
+ * - Also ensures required collections are non-empty (adds an element when element type can be inferred).
+ *
+ * This is intentionally heuristic: the goal is to create a "usable enough" object graph for codegen
+ * without knowing real business semantics.
+ */
+ private def bestEffortFillJsonPropertyDefaults(
+ rootDescriptor: AnyRef,
+ recursionDepthLimit: Int
+ ): Unit = {
+ val visitedIdentityHashes = mutable.Set.empty[Int]
+
+ def fillRecursively(currentObject: AnyRef, remainingDepth: Int): Unit = {
+ if (currentObject == null || remainingDepth < 0) return
+ val objectId = System.identityHashCode(currentObject)
+ if (visitedIdentityHashes.contains(objectId)) return
+ visitedIdentityHashes += objectId
+
+ val currentTypeEnv = envFor(currentObject)
+
+ walkHierarchy(currentObject.getClass) { declaringClassInHierarchy =>
+ declaringClassInHierarchy.getDeclaredFields.foreach { declaredField =>
+ if (!shouldSkipField(declaredField)) {
+ val jsonPropertyOpt =
+ jsonPropertyForFieldOrAccessors(declaringClassInHierarchy, declaredField)
+ jsonPropertyOpt.foreach { jsonPropertyAnn =>
+ declaredField.setAccessible(true)
+
+ val currentFieldValue = Try(declaredField.get(currentObject)).toOption.orNull
+ val defaultValueText = Option(jsonPropertyAnn.defaultValue()).getOrElse("").trim
+ val isRequired = jsonPropertyAnn.required()
+
+ val resolvedFieldType = resolveType(declaredField.getGenericType, currentTypeEnv)
+ val needsInitialization =
+ (currentFieldValue == null) && (isRequired || defaultValueText.nonEmpty)
+
+ val ensuredValue: AnyRef =
+ if (needsInitialization) {
+ val defaultValue = defaultValueForResolvedType(
+ targetType = resolvedFieldType,
+ defaultValueText = defaultValueText,
+ remainingDepth = remainingDepth,
+ typeEnvAtParent = currentTypeEnv
+ )
+ if (defaultValue != null) {
+ trySet(currentObject, declaringClassInHierarchy, declaredField, defaultValue)
+ defaultValue
+ } else currentFieldValue
+ } else currentFieldValue
+
+ val updatedValue =
+ ensureNonEmptyIfRequired(
+ owningInstance = currentObject,
+ declaringClass = declaringClassInHierarchy,
+ field = declaredField,
+ currentFieldValue = ensuredValue,
+ jsonPropertyAnn = jsonPropertyAnn,
+ resolvedFieldType = resolvedFieldType,
+ typeEnvAtField = currentTypeEnv,
+ remainingDepth = remainingDepth
+ )
+
+ recurseIntoValue(updatedValue, remainingDepth - 1, fillRecursively)
+ }
+ }
+ }
+ }
+ }
+
+ fillRecursively(rootDescriptor, recursionDepthLimit)
+ }
+
+ private def ensureNonEmptyIfRequired(
+ owningInstance: AnyRef,
+ declaringClass: Class[_],
+ field: Field,
+ currentFieldValue: AnyRef,
+ jsonPropertyAnn: JsonProperty,
+ resolvedFieldType: Type,
+ typeEnvAtField: TypeEnv,
+ remainingDepth: Int
+ ): AnyRef = {
+ if (!jsonPropertyAnn.required() || remainingDepth <= 0) return currentFieldValue
+
+ // If required and null, try to initialize collection containers too
+ val ensuredNonNullValue: AnyRef =
+ if (currentFieldValue != null) currentFieldValue
+ else {
+ val rawFieldClass = typeToClass(resolvedFieldType).getOrElse(field.getType)
+ val defaultValue = defaultValueForResolvedType(
+ targetType = rawFieldClass,
+ defaultValueText = "",
+ remainingDepth = remainingDepth,
+ typeEnvAtParent = typeEnvAtField
+ )
+ if (defaultValue != null) trySet(owningInstance, declaringClass, field, defaultValue)
+ defaultValue
+ }
+
+ if (ensuredNonNullValue == null) return null
+
+ val runtimeValueClass = ensuredNonNullValue.getClass
+ val elementTypeOpt =
+ elementTypeOfResolved(resolvedFieldType).map(et => resolveType(et, typeEnvAtField))
+
+ def makeElementValue(): AnyRef = {
+ val elementType = elementTypeOpt.getOrElse(classOf[String])
+ defaultValueForResolvedType(
+ targetType = elementType,
+ defaultValueText = "",
+ remainingDepth = remainingDepth - 1,
+ typeEnvAtParent = typeEnvAtField
+ )
+ }
+
+ if (isJavaList(runtimeValueClass)) {
+ val javaList = ensuredNonNullValue.asInstanceOf[util.List[AnyRef]]
+ if (javaList.isEmpty) {
+ val elementValue = makeElementValue()
+ if (elementValue != null) javaList.add(elementValue)
+ }
+ } else if (isScalaIterable(runtimeValueClass)) {
+ val scalaIterable = ensuredNonNullValue.asInstanceOf[scala.collection.Iterable[Any]]
+ if (scalaIterable.isEmpty) {
+ val elementValue = makeElementValue()
+ if (elementValue != null)
+ trySet(owningInstance, declaringClass, field, List(elementValue).asInstanceOf[AnyRef])
+ }
+ } else if (runtimeValueClass.isArray && runtimeValueClass.getComponentType == classOf[String]) {
+ val stringArray = ensuredNonNullValue.asInstanceOf[scala.Array[String]]
+ if (stringArray.isEmpty)
+ trySet(owningInstance, declaringClass, field, scala.Array("x").asInstanceOf[AnyRef])
+ }
+
+ Try(field.get(owningInstance)).toOption.orNull
+ }
+
+ private def defaultValueForResolvedType(
+ targetType: Type,
+ defaultValueText: String,
+ remainingDepth: Int,
+ typeEnvAtParent: TypeEnv
+ ): AnyRef = {
+ val trimmedDefaultValueText = Option(defaultValueText).getOrElse("").trim
+ val resolvedTargetType = resolveType(targetType, typeEnvAtParent)
+
+ resolvedTargetType match {
+ case rawClass: Class[_] =>
+ if (rawClass == classOf[String]) {
+ if (trimmedDefaultValueText.nonEmpty) trimmedDefaultValueText else "x"
+ } else if (rawClass == java.lang.Boolean.TYPE || rawClass == classOf[java.lang.Boolean]) {
+ val booleanValue = trimmedDefaultValueText.toLowerCase match {
+ case "true" => true
+ case "false" => false
+ case _ => false
+ }
+ java.lang.Boolean.valueOf(booleanValue)
+ } else if (rawClass == java.lang.Integer.TYPE || rawClass == classOf[java.lang.Integer]) {
+ java.lang.Integer.valueOf(Try(trimmedDefaultValueText.toInt).getOrElse(1))
+ } else if (rawClass == java.lang.Long.TYPE || rawClass == classOf[java.lang.Long]) {
+ java.lang.Long.valueOf(Try(trimmedDefaultValueText.toLong).getOrElse(1L))
+ } else if (rawClass == java.lang.Double.TYPE || rawClass == classOf[java.lang.Double]) {
+ java.lang.Double.valueOf(Try(trimmedDefaultValueText.toDouble).getOrElse(1.0d))
+ } else if (rawClass == java.lang.Float.TYPE || rawClass == classOf[java.lang.Float]) {
+ java.lang.Float.valueOf(Try(trimmedDefaultValueText.toFloat).getOrElse(1.0f))
+ } else if (rawClass.isEnum) {
+ chooseEnumConstant(rawClass, trimmedDefaultValueText)
+ } else if (isJavaList(rawClass)) {
+ new util.ArrayList[AnyRef]()
+ } else if (isScalaIterable(rawClass)) {
+ List.empty[Any]
+ } else if (rawClass.isArray && rawClass.getComponentType == classOf[String]) {
+ scala.Array.empty[String]
+ } else if (classOf[scala.Option[_]].isAssignableFrom(rawClass)) {
+ None
+ } else if (
+ !rawClass.isInterface && !Modifier.isAbstract(rawClass.getModifiers) && remainingDepth > 0
+ ) {
+ instantiateBestEffort(rawClass).orNull
+ } else null
+
+ case parameterizedType: ParameterizedType =>
+ val rawClass = typeToClass(parameterizedType.getRawType).orNull
+ if (rawClass == null) return null
+
+ if (rawClass.isEnum) {
+ chooseEnumConstant(rawClass, trimmedDefaultValueText)
+ } else if (isJavaList(rawClass)) {
+ new util.ArrayList[AnyRef]()
+ } else if (isScalaIterable(rawClass)) {
+ List.empty[Any]
+ } else if (classOf[scala.Option[_]].isAssignableFrom(rawClass)) {
+ None
+ } else if (
+ !rawClass.isInterface && !Modifier.isAbstract(rawClass.getModifiers) && remainingDepth > 0
+ ) {
+ val instanceOpt = instantiateBestEffort(rawClass)
+ instanceOpt.foreach { newInstance =>
+ val newInstanceTypeEnv =
+ envForParameterizedInstance(
+ rawClass,
+ parameterizedType.getActualTypeArguments,
+ typeEnvAtParent
+ )
+ envByObj.put(newInstance, newInstanceTypeEnv)
+ }
+ instanceOpt.orNull
+ } else null
+
+ case _ =>
+ null
+ }
+ }
+
+ /**
+ * Attempts to set a value into a field through multiple strategies:
+ * 1) Direct reflective field set
+ * 2) Scala setter: fieldName_$eq
+ * 3) JavaBean setter: setFieldName
+ */
+ private def trySet(
+ owningInstance: AnyRef,
+ declaringClass: Class[_],
+ field: Field,
+ newValue: AnyRef
+ ): Unit = {
+ // 1) Try direct field set
+ val didSetViaField = Try {
+ field.setAccessible(true); field.set(owningInstance, newValue)
+ }.isSuccess
+ if (didSetViaField) return
+
+ // 2) Try Scala setter: name_$eq
+ val scalaSetterName = field.getName + "_$eq"
+ val didInvokeScalaSetter = Try {
+ val matchingMethodOpt =
+ declaringClass.getDeclaredMethods.find(m =>
+ m.getName == scalaSetterName && m.getParameterCount == 1
+ )
+ matchingMethodOpt.foreach { setterMethod =>
+ setterMethod.setAccessible(true)
+ setterMethod.invoke(owningInstance, newValue.asInstanceOf[Object])
+ }
+ matchingMethodOpt.isDefined
+ }.getOrElse(false)
+ if (didInvokeScalaSetter) return
+
+ // 3) Try JavaBean setter: setX
+ val javaBeanSetterName = "set" + upperFirst(field.getName)
+ Try {
+ val matchingMethodOpt =
+ declaringClass.getDeclaredMethods.find(m =>
+ m.getName == javaBeanSetterName && m.getParameterCount == 1
+ )
+ matchingMethodOpt.foreach { setterMethod =>
+ setterMethod.setAccessible(true)
+ setterMethod.invoke(owningInstance, newValue.asInstanceOf[Object])
+ }
+ }
+ ()
+ }
+
+ // ------------------------------------------------------------
+ // Raw Invalid String Detection (generic-aware)
+ // ------------------------------------------------------------
+
+ /**
+ * Replaces string values in @JsonProperty fields (and string containers) with the rawInvalidText marker.
+ *
+ * Returns which members were changed and which ones could not be changed.
+ */
+ private def rawInvalidTextJsonPropertyStringsDeep(
+ rootDescriptor: AnyRef,
+ rawInvalidTextMarker: String,
+ recursionDepthLimit: Int
+ ): RawInvalidTextResult = {
+ val changedMembers = mutable.ArrayBuffer.empty[String]
+ val failedMembers = mutable.ArrayBuffer.empty[String]
+ val visitedIdentityHashes = mutable.Set.empty[Int]
+
+ def rawInvalidTextRecursively(currentObject: AnyRef, remainingDepth: Int): Unit = {
+ if (currentObject == null || remainingDepth < 0) return
+ val objectId = System.identityHashCode(currentObject)
+ if (visitedIdentityHashes.contains(objectId)) return
+ visitedIdentityHashes += objectId
+
+ val currentTypeEnv = envFor(currentObject)
+
+ walkHierarchy(currentObject.getClass) { declaringClassInHierarchy =>
+ declaringClassInHierarchy.getDeclaredFields.foreach { declaredField =>
+ if (!shouldSkipField(declaredField)) {
+ val jsonPropertyOpt =
+ jsonPropertyForFieldOrAccessors(declaringClassInHierarchy, declaredField)
+ jsonPropertyOpt.foreach { jsonPropertyAnn =>
+ declaredField.setAccessible(true)
+ val jsonPropertyName =
+ effectiveJsonPropName(jsonPropertyAnn, fallback = declaredField.getName)
+
+ val resolvedFieldType = resolveType(declaredField.getGenericType, currentTypeEnv)
+ val rawFieldClass = typeToClass(resolvedFieldType).getOrElse(declaredField.getType)
+ val currentFieldValue = Try(declaredField.get(currentObject)).toOption.orNull
+
+ if (rawFieldClass == classOf[String]) {
+ val didInjected = Try {
+ trySet(
+ currentObject,
+ declaringClassInHierarchy,
+ declaredField,
+ rawInvalidTextMarker
+ )
+ }.isSuccess
+ if (didInjected)
+ changedMembers += s"""${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}(@JsonProperty("$jsonPropertyName"))"""
+ else
+ failedMembers += s"${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}"
+
+ } else if (isJavaList(rawFieldClass)) {
+ val javaListValue =
+ if (currentFieldValue != null) currentFieldValue.asInstanceOf[util.List[AnyRef]]
+ else {
+ val newList = new util.ArrayList[AnyRef]()
+ Try(trySet(currentObject, declaringClassInHierarchy, declaredField, newList))
+ newList
+ }
+
+ val isElementTypeString = elementTypeOfResolved(resolvedFieldType)
+ .map(et => resolveType(et, currentTypeEnv))
+ .flatMap(typeToClass)
+ .contains(classOf[String])
+
+ if (isElementTypeString) {
+ Try { javaListValue.clear(); javaListValue.add(rawInvalidTextMarker) }
+ changedMembers += s"""${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}[0](@JsonProperty("$jsonPropertyName"))"""
+ } else {
+ javaListValue.asScala.foreach(elementObj =>
+ rawInvalidTextRecursively(elementObj, remainingDepth - 1)
+ )
+ }
+
+ } else if (isScalaIterable(rawFieldClass)) {
+ val isElementTypeString = elementTypeOfResolved(resolvedFieldType)
+ .map(et => resolveType(et, currentTypeEnv))
+ .flatMap(typeToClass)
+ .contains(classOf[String])
+
+ if (isElementTypeString) {
+ val didSetList =
+ Try(
+ trySet(
+ currentObject,
+ declaringClassInHierarchy,
+ declaredField,
+ List(rawInvalidTextMarker).asInstanceOf[AnyRef]
+ )
+ ).isSuccess
+ if (didSetList)
+ changedMembers += s"""${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}[0](@JsonProperty("$jsonPropertyName"))"""
+ else
+ failedMembers += s"${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}"
+ } else {
+ recurseIntoValue(currentFieldValue, remainingDepth - 1, rawInvalidTextRecursively)
+ }
+
+ } else if (
+ rawFieldClass.isArray && rawFieldClass.getComponentType == classOf[String]
+ ) {
+ val didInjectedArray =
+ Try(
+ trySet(
+ currentObject,
+ declaringClassInHierarchy,
+ declaredField,
+ scala.Array(rawInvalidTextMarker).asInstanceOf[AnyRef]
+ )
+ ).isSuccess
+ if (didInjectedArray)
+ changedMembers += s"""${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}[0](@JsonProperty("$jsonPropertyName"))"""
+ else
+ failedMembers += s"${declaringClassInHierarchy.getSimpleName}.${declaredField.getName}"
+
+ } else {
+ recurseIntoValue(currentFieldValue, remainingDepth - 1, rawInvalidTextRecursively)
+ }
+ }
+ }
+ }
+ }
+ }
+
+ rawInvalidTextRecursively(rootDescriptor, recursionDepthLimit)
+ RawInvalidTextResult(changedMembers.distinct.toSeq, failedMembers.distinct.toSeq)
+ }
+
+ // ------------------------------------------------------------
+ // Reflection utilities
+ // ------------------------------------------------------------
+
+ /** Walks the class hierarchy from `startingClass` up to (excluding) java.lang.Object. */
+ private def walkHierarchy(startingClass: Class[_])(visitFn: Class[_] => Unit): Unit = {
+ var currentClass: Class[_] = startingClass
+ while (currentClass != null && currentClass != classOf[Object]) {
+ visitFn(currentClass)
+ currentClass = currentClass.getSuperclass
+ }
+ }
+
+ /** Filters out synthetic, compiler-generated, and static fields (things we should not involve with). */
+ private def shouldSkipField(field: Field): Boolean = {
+ field.isSynthetic || field.getName.contains("$") || Modifier.isStatic(field.getModifiers)
+ }
+
+ private def upperFirst(text: String): String =
+ if (text.isEmpty) text else s"${text.charAt(0).toUpper}${text.substring(1)}"
+
+ /**
+ * Finds a @JsonProperty annotation either on:
+ * - The field itself, or
+ * - A getter/setter method that corresponds to the field name (Scala/Java styles).
+ */
+ private def jsonPropertyForFieldOrAccessors(
+ declaringClass: Class[_],
+ field: Field
+ ): Option[JsonProperty] = {
+ Option(field.getAnnotation(classOf[JsonProperty])).orElse {
+ val fieldName = field.getName
+ val getterMethodNames =
+ Seq(fieldName, "get" + upperFirst(fieldName), "is" + upperFirst(fieldName))
+ val setterMethodNames = Seq(fieldName + "_$eq", "set" + upperFirst(fieldName))
+
+ val declaredMethods = declaringClass.getDeclaredMethods
+ def annotationOn(methodName: String, expectedParamCount: Int): Option[JsonProperty] =
+ declaredMethods
+ .find(m =>
+ m.getName == methodName && !m.isSynthetic && m.getParameterCount == expectedParamCount
+ )
+ .flatMap(m => Option(m.getAnnotation(classOf[JsonProperty])))
+
+ getterMethodNames.iterator
+ .map(candidateName => annotationOn(candidateName, 0))
+ .find(_.nonEmpty)
+ .flatten
+ .orElse(
+ setterMethodNames.iterator
+ .map(candidateName => annotationOn(candidateName, 1))
+ .find(_.nonEmpty)
+ .flatten
+ )
+ }
+ }
+
+ private def effectiveJsonPropName(jsonPropertyAnn: JsonProperty, fallback: String): String = {
+ val explicitName = Option(jsonPropertyAnn.value()).getOrElse("").trim
+ if (explicitName.nonEmpty) explicitName else fallback
+ }
+
+ private def isJavaList(clazz: Class[_]): Boolean =
+ classOf[util.List[_]].isAssignableFrom(clazz)
+
+ private def isScalaIterable(clazz: Class[_]): Boolean =
+ classOf[scala.collection.Iterable[_]].isAssignableFrom(clazz) ||
+ classOf[scala.collection.Seq[_]].isAssignableFrom(clazz)
+
+ private def chooseEnumConstant(enumClass: Class[_], desiredValue: String): AnyRef = {
+ val enumConstants = enumClass.getEnumConstants.asInstanceOf[scala.Array[AnyRef]]
+ if (enumConstants == null || enumConstants.isEmpty) return null
+
+ val desiredLower = Option(desiredValue).getOrElse("").trim.toLowerCase
+ if (desiredLower.isEmpty) return enumConstants.head
+
+ def getNameViaReflection(enumValue: AnyRef): Option[String] =
+ Try {
+ val getNameMethod = enumValue.getClass.getMethod("getName")
+ getNameMethod.setAccessible(true)
+ getNameMethod.invoke(enumValue).toString
+ }.toOption
+
+ enumConstants
+ .find { constant =>
+ val enumName = Try(constant.asInstanceOf[Enum[_]].name()).toOption.getOrElse("")
+ val stringRepr = constant.toString.toLowerCase
+ val enumNameLower = enumName.toLowerCase
+ val reflectedNameLower = getNameViaReflection(constant).getOrElse("").toLowerCase
+ stringRepr == desiredLower || enumNameLower == desiredLower || reflectedNameLower == desiredLower
+ }
+ .getOrElse(enumConstants.head)
+ }
+
+ /**
+ * Best-effort instantiation for arbitrary classes:
+ * - Scala object (MODULE$), else
+ * - No-arg constructor.
+ */
+ private def instantiateBestEffort(clazz: Class[_]): Option[AnyRef] = {
+ val scalaModuleInstanceOpt = Try(clazz.getField("MODULE$")).toOption
+ .orElse(Try(clazz.getDeclaredField("MODULE$")).toOption)
+ .flatMap { moduleField =>
+ Try { moduleField.setAccessible(true); moduleField.get(null).asInstanceOf[AnyRef] }.toOption
+ }
+
+ scalaModuleInstanceOpt.orElse {
+ Try {
+ val noArgConstructor = clazz.getDeclaredConstructor()
+ noArgConstructor.setAccessible(true)
+ noArgConstructor.newInstance().asInstanceOf[AnyRef]
+ }.toOption
+ }
+ }
+
+ /**
+ * Recurses into:
+ * - Java Lists
+ * - Scala Iterables
+ * - Arrays
+ * - Arbitrary non-leaf objects (excludes primitives, boxed primitives, String, enums, and core java/scala packages)
+ */
+ private def recurseIntoValue(
+ value: AnyRef,
+ remainingDepth: Int,
+ visitFn: (AnyRef, Int) => Unit
+ ): Unit = {
+ if (value == null || remainingDepth < 0) return
+
+ value match {
+ case javaList: util.List[_] =>
+ javaList.asScala.foreach {
+ case elementRef: AnyRef => visitFn(elementRef, remainingDepth)
+ case _ => ()
+ }
+
+ case scalaIterable: scala.collection.Iterable[_] =>
+ scalaIterable.foreach {
+ case elementRef: AnyRef => visitFn(elementRef, remainingDepth)
+ case _ => ()
+ }
+
+ case arrayValue: scala.Array[_] =>
+ arrayValue.foreach {
+ case elementRef: AnyRef => visitFn(elementRef, remainingDepth)
+ case _ => ()
+ }
+
+ case otherValue =>
+ val runtimeClass = otherValue.getClass
+ val isLeafValue =
+ runtimeClass.isPrimitive ||
+ runtimeClass == classOf[String] ||
+ classOf[java.lang.Number].isAssignableFrom(runtimeClass) ||
+ runtimeClass == classOf[java.lang.Boolean] ||
+ runtimeClass.isEnum ||
+ runtimeClass.getName.startsWith("java.") ||
+ runtimeClass.getName.startsWith("javax.") ||
+ runtimeClass.getName.startsWith("scala.")
+
+ if (!isLeafValue) visitFn(otherValue, remainingDepth)
+ }
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonClassgraphScanner.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonClassgraphScanner.scala
new file mode 100644
index 00000000000..4a0ca124f95
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonClassgraphScanner.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import io.github.classgraph.ClassGraph
+
+import java.lang.reflect.Modifier
+import scala.jdk.CollectionConverters._
+
+private[amber] object PythonClassgraphScanner {
+
+ def scanCandidates(
+ base: Class[_],
+ acceptPackages: Seq[String],
+ classLoader: ClassLoader
+ ): Seq[Class[_]] = {
+ val cg = new ClassGraph()
+ .overrideClassLoaders(classLoader)
+ .enableClassInfo()
+
+ acceptPackages.foreach(p => cg.acceptPackages(p))
+
+ val scanResult = cg.scan()
+ try {
+ val infoList =
+ if (base.isInterface) scanResult.getClassesImplementing(base.getName)
+ else scanResult.getSubclasses(base.getName)
+
+ infoList
+ .loadClasses()
+ .asScala
+ .toSeq
+ .filterNot(_.isInterface)
+ .filterNot(c => Modifier.isAbstract(c.getModifiers))
+ } finally {
+ scanResult.close()
+ }
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonConsoleCapture.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonConsoleCapture.scala
new file mode 100644
index 00000000000..2c4d49fa693
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonConsoleCapture.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.apache.texera.amber.pybuilder.PythonReflectionUtils.Captured
+
+import java.io.{ByteArrayOutputStream, PrintStream}
+import java.nio.charset.StandardCharsets
+
+private[amber] object PythonConsoleCapture {
+
+ def captureOutErr[A](thunk: => A): Captured[A] = {
+ val outByteArrayOutStream = new ByteArrayOutputStream()
+ val errByteArrayOutStream = new ByteArrayOutputStream()
+ val outPrintStream = new PrintStream(outByteArrayOutStream)
+ val errorPrintStream = new PrintStream(errByteArrayOutStream)
+
+ val value = Console.withOut(outPrintStream) { Console.withErr(errorPrintStream) { thunk } }
+ outPrintStream.flush()
+ errorPrintStream.flush()
+ Captured(
+ value,
+ outByteArrayOutStream.toString(StandardCharsets.UTF_8.name()),
+ errByteArrayOutStream.toString(StandardCharsets.UTF_8.name())
+ )
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonRawTextReportRenderer.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonRawTextReportRenderer.scala
new file mode 100644
index 00000000000..a88be97bc96
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonRawTextReportRenderer.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.apache.texera.amber.pybuilder.PythonReflectionTextUtils.indent
+import org.apache.texera.amber.pybuilder.PythonReflectionUtils.Finding
+
+private[amber] object PythonRawTextReportRenderer {
+
+ def render(findings: Seq[Finding], total: Int): String = {
+ val grouped = findings.groupBy(_.kind)
+ val stringBuilder = new StringBuilder
+ stringBuilder.append(
+ s"PythonRawTextReportRendererTest failures: ${findings.size} finding(s) across $total descriptor(s)\n"
+ )
+
+ def section(kind: String, title: String): Unit = {
+ grouped.get(kind).foreach { items =>
+ stringBuilder.append(s"\n== $title (${items.size}) ==\n")
+ items.sortBy(_.clazz).foreach { f =>
+ stringBuilder.append(s"- ${f.clazz}\n${indent(f.message.trim, 4)}\n")
+ }
+ }
+ }
+
+ section("instantiate", "Instantiation failures")
+ section("injection-failure", "Injection failed")
+ section("exception", "generatePythonCode exceptions")
+ section("raw-invalid-text-leak", "Raw invalid text leaked into generated Python")
+ section("py-compile", "py_compile failures")
+ section("stdout", "Unexpected stdout")
+ section("stderr", "Unexpected stderr")
+
+ stringBuilder.toString()
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionTextUtils.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionTextUtils.scala
new file mode 100644
index 00000000000..528d254ce53
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionTextUtils.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import scala.collection.mutable
+
+private[amber] object PythonReflectionTextUtils {
+
+ def indent(string: String, times: Int): String = {
+ val pad = " " * times
+ string.linesIterator.map(line => pad + line).mkString("\n")
+ }
+
+ def formatThrowable(throwable: Throwable): String = {
+ val message = Option(throwable.getMessage).getOrElse("No message")
+ val trace =
+ throwable.getStackTrace.filter(_.getClassName.startsWith("org.apache.texera")).take(5)
+ s"${throwable.getClass.getName}: $message\n${trace.mkString("\n")}"
+ }
+
+ def truncateBlock(string: String, maxLines: Int, maxChars: Int): String = {
+ val lines = string.linesIterator.take(maxLines).toList
+ val combined = lines.mkString("\n")
+ if (combined.length > maxChars) combined.take(maxChars) + "..." else combined
+ }
+
+ def countOccurrences(targetHay: String, needle: String): Int = {
+ if (needle.isEmpty) 0 else targetHay.split(java.util.regex.Pattern.quote(needle), -1).length - 1
+ }
+
+ def extractContexts(
+ string: String,
+ needle: String,
+ radius: Int,
+ maxContexts: Int
+ ): Seq[String] = {
+ val outArrayBuffer = mutable.ArrayBuffer.empty[String]
+ var idx = string.indexOf(needle)
+ while (idx != -1 && outArrayBuffer.size < maxContexts) {
+ val start = math.max(0, idx - radius)
+ val end = math.min(string.length, idx + needle.length + radius)
+ outArrayBuffer += string.substring(start, end)
+ idx = string.indexOf(needle, idx + 1)
+ }
+ outArrayBuffer.toSeq
+ }
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionUtils.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionUtils.scala
new file mode 100644
index 00000000000..6c04fd61224
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/pybuilder/PythonReflectionUtils.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.pybuilder
+
+import org.apache.texera.amber.operator.PythonOperatorDescriptor
+
+import java.lang.reflect.{Type, TypeVariable}
+
+object PythonReflectionUtils {
+
+ final case class RawInvalidTextResult(changed: Seq[String], failed: Seq[String])
+ final case class Finding(clazz: String, kind: String, message: String)
+ final case class Captured[A](value: A, out: String, err: String)
+
+ // Type-variable substitution environment
+ type TypeEnv = Map[TypeVariable[_], Type]
+
+ /** Scan non-abstract, non-interface candidates under acceptPackages. */
+ def scanCandidates(
+ base: Class[_],
+ acceptPackages: Seq[String],
+ classLoader: ClassLoader
+ ): Seq[Class[_]] =
+ PythonClassgraphScanner.scanCandidates(base, acceptPackages, classLoader)
+
+ /** Run the full instantiate -> fill -> inject -> execute -> leak check pipeline for one descriptor class. */
+ def checkDescriptor(
+ clazz: Class[_ <: PythonOperatorDescriptor],
+ rawInvalidText: String,
+ maxDepth: Int
+ ): Seq[Finding] =
+ new DescriptorChecker(rawInvalidText, maxDepth).check(clazz)
+
+ /** Same pipeline, but also returns the generated Python code when available. */
+ def checkDescriptorWithCode(
+ clazz: Class[_ <: PythonOperatorDescriptor],
+ rawInvalidText: String,
+ maxDepth: Int
+ ): DescriptorChecker.CheckResult =
+ new DescriptorChecker(rawInvalidText, maxDepth).checkWithCode(clazz)
+
+ def renderReport(findings: Seq[Finding], total: Int): String =
+ PythonRawTextReportRenderer.render(findings, total)
+
+ def captureOutErr[A](thunk: => A): Captured[A] =
+ PythonConsoleCapture.captureOutErr(thunk)
+
+}
diff --git a/common/workflow-operator/src/test/scala/org/apache/texera/amber/util/PythonCodeRawInvalidTextSpec.scala b/common/workflow-operator/src/test/scala/org/apache/texera/amber/util/PythonCodeRawInvalidTextSpec.scala
new file mode 100644
index 00000000000..122b1dbae8b
--- /dev/null
+++ b/common/workflow-operator/src/test/scala/org/apache/texera/amber/util/PythonCodeRawInvalidTextSpec.scala
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.util
+
+import com.typesafe.config.ConfigFactory
+import org.apache.texera.amber.operator.PythonOperatorDescriptor
+import org.apache.texera.amber.pybuilder.PythonReflectionTextUtils.truncateBlock
+import org.apache.texera.amber.pybuilder.PythonReflectionUtils
+import org.scalatest.funsuite.AnyFunSuite
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.util.concurrent
+import java.util.concurrent.TimeUnit
+import scala.util.Try
+
+/**
+ * Regression tests for validation pipeline used for PythonOperatorDescriptor codegen.
+ *
+ * What this suite checks:
+ * 1) Code generation must not leak raw invalid text from @JsonProperty string values into the emitted Python.
+ * 2) The emitted Python should pass a basic `py_compile` sanity check under an isolated interpreter.
+ *
+ * Notes:
+ * - "RawInvalid" is a marker chosen to be very unlikely to appear in real code.
+ * - We only scan under AcceptPackages to keep the suite fast and avoid pulling in unrelated classes.
+ */
+final class PythonCodeRawInvalidTextSpec extends AnyFunSuite {
+
+ // Scala literal "\\!." is the 3-char string: \!.
+ private val RawInvalid: String = "\\!."
+ private val MaxDepth: Int = 3
+ private val AcceptPackages: Seq[String] = Seq("org.apache.texera.amber.operator")
+
+ /**
+ * Runs `python -m py_compile` on the provided source, using an isolated interpreter invocation.
+ *
+ * Isolation flags:
+ * - -I : isolate (ignore user site-packages / env)
+ * - -S : don't import site
+ * - -B : don't write .pyc files
+ *
+ * @return Right(()) on success, Left(errorMessage) on failure (including timeout).
+ */
+ private def pyCompile(pythonExecutable: String, pythonSource: String): Either[String, Unit] = {
+ val tempFile = Files.createTempFile("texera_py_compile_", ".py")
+ try {
+ Files.write(tempFile, pythonSource.getBytes(StandardCharsets.UTF_8))
+
+ val processBuilder =
+ new ProcessBuilder(
+ pythonExecutable,
+ "-I",
+ "-S",
+ "-B",
+ "-m",
+ "py_compile",
+ tempFile.toString
+ )
+ // Merge stderr into stdout to keep a single combined output stream for easy reporting.
+ processBuilder.redirectErrorStream(true)
+
+ val processStartEither = Try(processBuilder.start()).toEither.left.map { thrown =>
+ s"Could not start python executable '$pythonExecutable': ${thrown.getClass.getName}: ${Option(thrown.getMessage)
+ .getOrElse("")}"
+ }
+
+ processStartEither.flatMap { process =>
+ val didFinish = process.waitFor(30, concurrent.TimeUnit.SECONDS)
+ if (!didFinish) {
+ process.destroyForcibly()
+ Left("py_compile timed out after 30s (process was killed)")
+ } else {
+ val combinedOutput =
+ Try(new String(process.getInputStream.readAllBytes(), StandardCharsets.UTF_8))
+ .getOrElse("")
+ .trim
+ val exitCode = process.exitValue()
+ if (exitCode == 0) Right(())
+ else {
+ val clippedOutput =
+ if (combinedOutput.nonEmpty)
+ truncateBlock(combinedOutput, maxLines = 40, maxChars = 8000)
+ else "(no output)"
+ Left(s"py_compile failed (exit=$exitCode)\nOutput:\n$clippedOutput")
+ }
+ }
+ }
+ } finally {
+ Try(Files.deleteIfExists(tempFile))
+ ()
+ }
+ }
+
+ /**
+ * Loads the Python executable path from configuration, with fallbacks.
+ *
+ * Lookup strategy:
+ * 1) Try parsing udf.conf from resources and resolving it.
+ * 2) Fall back to ConfigFactory.load().
+ * 3) Read python.path, trim, and ensure it's non-empty.
+ * 4) If missing or invalid, fall back to "python3", then "python", then "py"
+ * (validated by running --version).
+ */
+ private def loadPythonExeFromUdfConf(): Option[String] = {
+
+ def fromConfig: Option[String] = {
+ val configOpt =
+ Try(ConfigFactory.parseResources("udf.conf").resolve()).toOption
+ .orElse(Try(ConfigFactory.load()).toOption)
+
+ configOpt
+ .flatMap(c => Try(c.getConfig("python").getString("path")).toOption)
+ .map(_.trim)
+ .filter(_.nonEmpty)
+ }
+
+ def isRunnable(exe: String): Boolean = {
+ val pTry = Try(new ProcessBuilder(exe, "--version").redirectErrorStream(true).start())
+ pTry.toOption.exists { p =>
+ val finished = p.waitFor(5, TimeUnit.SECONDS)
+ if (!finished) { p.destroyForcibly(); false }
+ else p.exitValue() == 0
+ }
+ }
+
+ val candidates =
+ fromConfig.toList ++ List("python3", "python", "py")
+
+ candidates.distinct.find(isRunnable)
+ }
+
+ test(
+ "PythonOperatorDescriptor.generatePythonCode should not contain raw invalid JsonProperty Strings"
+ ) {
+ val classLoader = Thread.currentThread().getContextClassLoader
+
+ val descriptorCandidates =
+ PythonReflectionUtils
+ .scanCandidates(
+ base = classOf[PythonOperatorDescriptor],
+ acceptPackages = AcceptPackages,
+ classLoader = classLoader
+ )
+ .map(_.asInstanceOf[Class[_ <: PythonOperatorDescriptor]])
+ .sortBy(_.getName)
+
+ if (descriptorCandidates.isEmpty) {
+ fail(
+ s"No implementations of ${classOf[PythonOperatorDescriptor].getName} were found. " +
+ s"Check acceptPackages() / test classpath / module wiring."
+ )
+ }
+
+ val total = descriptorCandidates.size
+ var ok = 0
+ var checked = 0
+
+ val allFindings = descriptorCandidates.flatMap { descriptorClass =>
+ checked += 1
+ val findings =
+ PythonReflectionUtils.checkDescriptor(
+ descriptorClass,
+ rawInvalidText = RawInvalid,
+ maxDepth = MaxDepth
+ )
+
+ if (findings.isEmpty) {
+ ok += 1
+ println(s"[raw-invalid OK $ok/$total | checked $checked/$total] ${descriptorClass.getName}")
+ }
+
+ findings
+ }
+
+ println(s"[raw-invalid SUMMARY] ok=$ok/$total")
+
+ if (allFindings.nonEmpty) {
+ fail(PythonReflectionUtils.renderReport(allFindings, total = total))
+ }
+ }
+
+ test("PythonOperatorDescriptor.generatePythonCode should py_compile under isolated Python") {
+ val pythonExeOpt = loadPythonExeFromUdfConf()
+ if (pythonExeOpt.isEmpty) {
+ fail(
+ "python.path not found in udf.conf (or application.conf). Configure python.path to enable this test."
+ )
+ }
+ val pythonExecutable = pythonExeOpt.get
+ val classLoader = Thread.currentThread().getContextClassLoader
+
+ val descriptorCandidates =
+ PythonReflectionUtils
+ .scanCandidates(
+ base = classOf[PythonOperatorDescriptor],
+ acceptPackages = AcceptPackages,
+ classLoader = classLoader
+ )
+ .map(_.asInstanceOf[Class[_ <: PythonOperatorDescriptor]])
+ .sortBy(_.getName)
+
+ if (descriptorCandidates.isEmpty) {
+ fail(
+ s"No implementations of ${classOf[PythonOperatorDescriptor].getName} were found. " +
+ s"Check acceptPackages() / test classpath / module wiring."
+ )
+ }
+
+ val total = descriptorCandidates.size
+ var ok = 0
+ var checked = 0
+
+ val allFindings = descriptorCandidates.flatMap { descriptorClass =>
+ checked += 1
+
+ val checkResult =
+ PythonReflectionUtils.checkDescriptorWithCode(
+ descriptorClass,
+ rawInvalidText = RawInvalid,
+ maxDepth = MaxDepth
+ )
+
+ val pyCompileFindings = checkResult.code.toSeq.flatMap { generatedCode =>
+ pyCompile(pythonExecutable, generatedCode) match {
+ case Left(errorMessage) =>
+ Seq(PythonReflectionUtils.Finding(descriptorClass.getName, "py-compile", errorMessage))
+ case Right(()) => Nil
+ }
+ }
+
+ val findings = checkResult.findings ++ pyCompileFindings
+
+ if (findings.isEmpty && checkResult.code.nonEmpty) {
+ ok += 1
+ println(s"[py-compile OK $ok/$total | checked $checked/$total] ${descriptorClass.getName}")
+ }
+
+ findings
+ }
+
+ println(s"[py-compile SUMMARY] ok=$ok/$total")
+
+ if (allFindings.nonEmpty) {
+ fail(PythonReflectionUtils.renderReport(allFindings, total = total))
+ }
+ }
+
+}
From edf19880fc9243675625ee2b58545219800eb2fb Mon Sep 17 00:00:00 2001
From: Elliot <36275109+Falcons-Royale@users.noreply.github.com>
Date: Fri, 6 Feb 2026 13:09:26 -0800
Subject: [PATCH 4/6] added ternary contour op
---
.../texera/amber/operator/LogicalOp.scala | 2 +
.../ternaryContour/TernaryContourOpDesc.scala | 147 ++++++++++++++++++
.../assets/operator_images/TernaryContour.png | Bin 0 -> 6374 bytes
3 files changed, 149 insertions(+)
create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
create mode 100644 frontend/src/assets/operator_images/TernaryContour.png
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
index eb319a82d1d..caf1540de03 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
@@ -129,6 +129,7 @@ import org.apache.texera.amber.operator.visualization.sankeyDiagram.SankeyDiagra
import org.apache.texera.amber.operator.visualization.scatter3DChart.Scatter3dChartOpDesc
import org.apache.texera.amber.operator.visualization.scatterplot.ScatterplotOpDesc
import org.apache.texera.amber.operator.visualization.tablesChart.TablesPlotOpDesc
+import org.apache.texera.amber.operator.visualization.ternaryContour.TernaryContourOpDesc
import org.apache.texera.amber.operator.visualization.ternaryPlot.TernaryPlotOpDesc
import org.apache.texera.amber.operator.visualization.timeSeriesplot.TimeSeriesOpDesc
import org.apache.texera.amber.operator.visualization.treeplot.TreePlotOpDesc
@@ -242,6 +243,7 @@ trait StateTransferFunc
new Type(value = classOf[TablesPlotOpDesc], name = "TablesPlot"),
new Type(value = classOf[ContinuousErrorBandsOpDesc], name = "ContinuousErrorBands"),
new Type(value = classOf[FigureFactoryTableOpDesc], name = "FigureFactoryTable"),
+ new Type(value = classOf[TernaryContourOpDesc], name = "TernaryContour"),
new Type(value = classOf[TernaryPlotOpDesc], name = "TernaryPlot"),
new Type(value = classOf[DendrogramOpDesc], name = "Dendrogram"),
new Type(value = classOf[NestedTableOpDesc], name = "NestedTable"),
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
new file mode 100644
index 00000000000..2e9bde676aa
--- /dev/null
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.visualization.ternaryContour
+
+import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
+import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
+import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
+import org.apache.texera.amber.operator.PythonOperatorDescriptor
+import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
+import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+
+/**
+ * Visualization Operator for Ternary Plots.
+ *
+ * This operator uses three data fields to construct a ternary plot.
+ * The points can optionally be color coded using a data field.
+ */
+
+class TernaryContourOpDesc extends PythonOperatorDescriptor {
+
+ // Add annotations for the first variable
+ @JsonProperty(value = "firstVariable", required = true)
+ @JsonSchemaTitle("Variable 1")
+ @JsonPropertyDescription("First variable data field")
+ @AutofillAttributeName var firstVariable: String = ""
+
+ // Add annotations for the second variable
+ @JsonProperty(value = "secondVariable", required = true)
+ @JsonSchemaTitle("Variable 2")
+ @JsonPropertyDescription("Second variable data field")
+ @AutofillAttributeName var secondVariable: String = ""
+
+ // Add annotations for the third variable
+ @JsonProperty(value = "thirdVariable", required = true)
+ @JsonSchemaTitle("Variable 3")
+ @JsonPropertyDescription("Third variable data field")
+ @AutofillAttributeName var thirdVariable: String = ""
+
+ // Add annotations for the fourth variable
+ @JsonProperty(value = "fourthVariable", required = true)
+ @JsonSchemaTitle("Variable 4")
+ @JsonPropertyDescription("Fourth variable data field")
+ @AutofillAttributeName var fourthVariable: String = ""
+
+ // OperatorInfo instance describing ternary plot
+ override def operatorInfo: OperatorInfo =
+ OperatorInfo(
+ userFriendlyName = "Ternary Contour",
+ operatorDescription = "A ternary contour plot shows how a measured value changes across all mixtures of three components that always sum to a constant (usually 100%).",
+ operatorGroupName = OperatorGroupConstants.VISUALIZATION_SCIENTIFIC_GROUP,
+ inputPorts = List(InputPort()),
+ outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
+ )
+
+ override def getOutputSchemas(
+ inputSchemas: Map[PortIdentity, Schema]
+ ): Map[PortIdentity, Schema] = {
+ val outputSchema = Schema()
+ .add("html-content", AttributeType.STRING)
+ Map(operatorInfo.outputPorts.head.id -> outputSchema)
+ Map(operatorInfo.outputPorts.head.id -> outputSchema)
+ }
+
+ /** Returns a Python string that drops any tuples with missing values */
+ def manipulateTable(): String = {
+ // Check for any empty data field names
+ assert(firstVariable.nonEmpty && secondVariable.nonEmpty && thirdVariable.nonEmpty)
+ s"""
+ | # Remove any tuples that contain missing values
+ | table.dropna(subset=['$firstVariable', '$secondVariable', '$thirdVariable', '$fourthVariable'], inplace = True)
+ |
+ | #Remove rows where any of the first three variables are negative
+ | table = table[(table[['$firstVariable', '$secondVariable', '$thirdVariable']] >= 0).all(axis=1)]
+ |
+ | #Remove zero-sum rows
+ | s = table['$firstVariable'] + table['$secondVariable'] + table['$thirdVariable']
+ | table = table[s > 0]
+ |""".stripMargin
+ }
+
+ /** Returns a Python string that creates the ternary contour plot figure */
+ def createPlotlyFigure(): String = {
+ s"""
+ | A = table['$firstVariable'].to_numpy()
+ | B = table['$secondVariable'].to_numpy()
+ | C = table['$thirdVariable'].to_numpy()
+ | Z = table['$fourthVariable'].to_numpy()
+ | fig = ff.create_ternary_contour(np.array([A,B,C]), Z, pole_labels=['$firstVariable', '$secondVariable', '$thirdVariable'], interp_mode='cartesian')
+ |""".stripMargin
+ }
+
+ /** Returns a Python string that yields the html content of the ternary contour plot */
+ override def generatePythonCode(): String = {
+ val finalCode =
+ s"""
+ |from pytexera import *
+ |
+ |import plotly.express as px
+ |import plotly.io
+ |import plotly.figure_factory as ff
+ |import numpy as np
+ |
+ |class ProcessTableOperator(UDFTableOperator):
+ |
+ | # Generate custom error message as html string
+ | def render_error(self, error_msg):
+ | return '''TernaryContour is not available.
+ | Reasons are: {}
+ | '''.format(error_msg)
+ |
+ | @overrides
+ | def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
+ | if table.empty:
+ | yield {'html-content': self.render_error("Input table is empty.")}
+ | return
+ | ${manipulateTable()}
+ | if table.empty:
+ | yield {'html-content': self.render_error("No valid rows left (every row has at least 1 missing value).")}
+ | return
+ | ${createPlotlyFigure()}
+ | # Convert fig to html content
+ | html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
+ | yield {'html-content':html}
+ |""".stripMargin
+ finalCode
+ }
+
+}
diff --git a/frontend/src/assets/operator_images/TernaryContour.png b/frontend/src/assets/operator_images/TernaryContour.png
new file mode 100644
index 0000000000000000000000000000000000000000..ba0e8c3ec23d6dc2260b628e5aee5b672bbd2bf8
GIT binary patch
literal 6374
zcmb7J^;Z+<@5dr-#PcrJ#)_7x%Y>;&vWNYj2>8xn&JTk9v&XGhPsNuogMm5$w=zD%|cdyOu<(meIc
zK0H-Wje+C9CE~J&HoPi4Of{_zFM2bG7@kTW70%y4ic1)R8;iW{vF=Gx?2@-aj5hn{
z#a})+dsv`zA?!?b@T%}8Fxv?$0~8|(V1atxQllQKvIeu93iW7O`R$LmB*!j27kmqyNJt&QV#4oV
zK)3!FR8*j8sv8Z-4I|_nP*Z`CxfG+ikj6c8GL{>rbDPHSo>>E_+peoLgL)~
z@xv&Z!t~>?AaJQ`lv_#etbcECxSuNLSSVTsEV`&I57JSfIq`wUUdw=;F>!K`I?cAa
zv4~89>qkIaue{NDynw6_-Hr)yTooQ=1U}2jj*O0%?3vZIfhv@S=!Sm#d?p^}m#5-N
z&>1eTel;|^dZvd!Qyj3>dwqn0z`_pJv{O9Z+qY+mw~{{FXfJI(u5s|fN~fe6`z*6;
zHU4sTDhGVJ=lMxQ8xx94O|IKxTOZ$jOrLh}GK^-BBQ0~QdPE)bw~)e9ZyYCcofk1g
z`t=!eCVnnc;q(?2^l=Y2FGuFudT$^q>M%aicUP%5-84u?MEyxwf%pU9ziE;`JpDe1
zu~;Zo*W;eJ9++*LV9(NrLFg*TVtiNw7C5;HX7zXMf1T5TP^q3CRr
z`K1l+`Seg1-{Ud(Y1#Az-q}@ZFd&Y{lQdf
z;@ur9r!?HH?W;Llfr{GQBi{W*b$37O>$X1M#aKvApwIjTUf{kKI3p6=Gj29C5bfj8
zR=Z#Rx@z$Q^zAg|%!m^YA3;^anA8F=7H8*r|BsQno%$fTHzf}5aa9reBa9to$2(^|
zLwZizfabOr|9TfG0u|+0YY(YDq}NLvR1@Cx;-
zykj0GG3&RVn>-HAosRoj>Ks+NJP#3A^>-I}U3v7laqU-b@h1RzZqm%~lW=-XR!gdx
zz$}~m?z|Llrn5p|N9p35l}SYtof|wVU8TWup46ur=s1X!;12N(NBQyPJzqarm_BL*
zJ{Tak@_p#^7A&DyB~dWD%nQ(v2UZ@@CH(ZGAxpFTs1onXhPP=&n-S$Q{GoA7{(~^I
z`Z$&1Ac5~Ru5lD&pzX;s0Dg-R@vtb4{5tQ>rO_V7*eH
zIse*8)U!8P6@8pM6l>I;l!%cp!8~}7y5}0#RnoL!1iPh*RZ!>geEU12h?oB*xt
zCT>stHLrR}*D8n|9fbSYpVoAJ7d09-+JN)@mp-DNbkN^r*pF}|T_TldK|!M;)fedo
z<9KgvP|grky3~>G-ADdp?1-!fo0g>1le!&MEbrGNQ!=_Hj$vagy_YY}A2OF4RqQSvf`zdl2+7tH^$sB=Hx-kuB3zdB2v$J%1
zzkHhS3$-n!T`C
zxxJz~pJ9r6G4AHT5^cpxwYGf&M6{fmq&w03bu_$4Eal2nIk}Bv`hDxHa%Dt9cuUcA9Fv8War+^*
zg5IWul24AX(H);{B|Iy(1E#ERK0f)I+C22LWq>T@3A)nz{!d}wMCztfsl)yBX$S4W
z(CtBfh$BBH#^I;L@^(Q3CO2W4=2s}GMf`QBJt<(3y*?{$)7A#dkhBGyUi4L&*eQ^bHGH*43Ccn)9b#d
zS;|eK^^Z$HDwM8mO0y-RjGdO_4K?_4XA&8&E4`rPQ*&qsMSHF#
zc!q$=a5CsSMjALi-O!qQPIp0i27TUqhd0J3q5%BsqO;`M54)}@JY}4`w1YP+WogqvaEz
z_#0!MC>NT>r>+}sELAQuUN>v^E}Pb#cl&CNKR+RDV>-kRMY=)ccZZxYiXQf>SExBp
zm%>km+%$usuuR9r^Jt`hlX4@;MZk}sl7Tx;XusoxJDB!jhIRXyDvipma`ZO$j=%SZ
zI@^=i>(a=U{M@RqE5nTRhfV~dX#d8uoubGt2;QCa4!}k{K%48^_#FM+6NYhyW8~rw
zt_UyaW86k74YJU-KL?F=};_$
z>{7A7!0?O@b0sG&osj=V`+Mk{PsiLG{D2$0y5TzdFIVI!rYZZelBp*`n%WiZiFJSFlZ+*~2m
zmZPSZf{pA8qN7~%>*d$8Sp?4k0zR+cbAJS=loFN9xVlDbc+~gfZio)x_i*DmdYivx
zj_4S0L%84$2cmuE?wD)OrYs|k`K0u`!53ZCYT|O5e6Dr)?UVY-t%|jj2~*>~FNvHopG{
zWgdO4ccxx+ErLV{))I}stoH3leq+ebRkoTV8eCea#Wea^f6j$xH(qIizmYM>v*WJM
z>c)X#UGyQo3SUc_GHp59h?
ztGiBJ_DYkGby2k4l&ztFGXYao8h(e-^jevr3r|by{4XJjeT_xoM?_-ZC
z%QxmxqA$CqY-kdKZv0j9|7`1noxi;O&;p-c2=b>A?d!f&D_hKbMz=~7HcK=9qomsG
z0ox12rz)~R@@exA1x1I^YoaCQMx|x%h{sv8VEM^@_&asS%>oDdT0~=fu8%gT5+s?S
zESKB|EX&Mpq0is#(}`@K+qq_L8Ntp=QYBgo(FfI3Yuz;`%|xJ#*@wK3e4+Wd5=aq|
zZrHWfEkz^C)O~^D=L~YHYta!w_VS$0S~UIL^FETh$N8#Zx(_>ynnz*~7Uzu;
zc)!CG;A~8gV9ArwH%2kTNgMs2?}IR5v*SR8r?{HC-RFR3v$+LD?oQ%)>n#p@bbwR=
zWsmY`!SR?Hgbd{sUqQYXnupxO?Bjai`ujF^6j{T!#%@+;d+|edWKhErT&QQuup=^b
zSO7D3V!IA_xWYbEs4fdEg(Rfs#S9sHKtEPIg-vccYILX9vVWTG{!rt=%unJ|@3H?K
zxO>RdD|l}=(+GRCN!B2E#NP6>Cfey*#TU273eG>pjNLuDbm#l{ZZxj~dO4
zHffKz=b3`zLPhGKUX{&HD3KlzmR|FBT$?<IHBj#6HU1Hm`7agR+NbnxIqevFBi#2L{J&r;ew
z_K~@Ik5#9;){2}>zPo2zpYP7Z{Vf#i9YmHJ3Xz~S
zVJaE>6d^yY9XFS3o&Y_5Zf$q`zVX+`^@*VrX*c1%K?(-0X?+y+Rq2vmp#^q4MoJo3
zDIr9d|HGPeu^egYybu4(y42Yj$zS8~feS_B5Eo9FMD9zI;_+6fl_uadN&8#K$!n83
zEtaJ;N}dy;vP)K(!}`efw~;q;=T5%C)lEk~=OnUJQ!`92-@k1R6Q_#dtzG*3XsFJL
zzmb^X*{Mzcq76wSbxx_|WSvuI*|*qo#C2mGFKb7uZF+a*kyQQ_1|ZIU#m8=0Xkj8w
z|MgS-28L6eza^6?!><)4%ZlLNTLcnAcXzni+K!5;aIQiv-)YE}V)l@R$f!vZiKy9e
zo~8(Su{SRQ-c+1VGM$hea=7Mpgx+4WG$vu2^5ZM+-eJv0fRtN3?eAa#DMs=mCgtNl
zLlTn^2rCXpbtUrj$_AY#EKd=){sQ$a#cQ2X{AxbSlMGo|tt4^dOd5fpe*1;0s2vz|
zFT3)jL$aP4X}QKI`&Y`35L4Y6rWU#M(yh5Wo~oG6w&886$R%ltU}Z*uz=knTuAqrG
zSaIURiDcZ)bc4r_Oo|yH%4$VtG}pZG9&v>HN1NLfm7faqbE`@)=^|r3=Pe;>5(aPq
z)R{&$EmEi6=g;Y6AXEOg!b4evGQLxwE(dv9K}nh}ci6zPbKS!o>Z288IL-Bs6wrwn
zXp&!%5=ZxRMYZs+qLVeKRbseSH7^bR?1tlYt$N5njaRT@W;Spy1(aHUe*0HguTFT<
zTxZR-S-yDYx3Qpqw}CsO2bBsm)_@PbsBzE*Y0x5AA
z?K%P06E%U@;0@+p(96>!K&;t*s6ydu{X;T|El*5bcc)@kqpFz=mP6ms=cHz~NYg}Q
zDU#7>qh@F&Sii#11ikg#>aWcG3@#q>E!V`Dp0NzAPnsK^jSB=;#`xjBp4E~<06`40=spIqrJi}Y|{2s`Eb
zbI$Lr_Z)B`7LQSLbJ{r{uhV4hwWsT8&xn4?iu$;}d8Wj;Iv7yX$jR_R!_2r^$+crS
zGj?{T@qSHq4jU^38zF&>;{VW|e+4O0L7D~PW6N3L#@
zBZI5sx{(Dv*6AZe)wVJY7nJ@nL$yD-!XhmK^GJ=YpeDJ`PBpygfPi5W70sO6x+
z%Z$O1oslY=!g6!gmi-*$3qZpPPX6$`v#}{lTO3n*skOj?J?0VGfH!X%#7y5cr)cD4ij%K}Yi)JC
z!DiRTH%}%=ZtlNvPYccq2$e7rw!Gby1}`Gsn}4M;n@8lj9_wri7KTNCB6cEc@7Saq
zBD&F1987vMqPpc*oyXZa#hhSIc0h>u=g4ptoH+^pzyS3J_scU{D7t*>xzCC{<7(0A
zsLhmYux#71RY>}3@XYd3blujEw4X`K=kqW?1US+49{Z=sTuft3mCv@rvyOX7W>Ck6
zvOUs?IfWV#2#BR%P~Gg>&S5FKLL^(O}eW
zGOw;!n7QTEZZ%iiwYh+gF+&teTZYc8txfQ>0q`@VqN@WOo
zvnc~wwU-HV$^gdn4QC>20HxOWsJ@z8Hh{n2v%`xy?U|G(kFl>dtaE&ZU0AS+wT~Sg
zk?2zO%2mDivR^~zKZZ{IRE6vPJN4VqLgdSK*ap++SxBWfh<%80K
zTnvzMf{sE#GvJfpZG~>*;C5V_Yt~zFOQn^jstjsQ}i4`$Jogcf}@Nn>PgGOnCMKTb2g<9P%
zGe5ZxT~M4@U^FytEaVyoU9Zou14DFd^RqIZ3a?u_&73+$97WMNR^w#7M$M;Gg0<9k
z#Hi#1%sKRhY7SJr9*Km(wFse$$p)N!MQhi89hL%9Ar8lk>vJvgTLUtf>3QT6ea+aBM!m1Ey=jceWK(JEKKZ{^ZDB>Z`E;7*Gf`>Y*VqTud(Rv|1q0!y^cHJckz
z?x*`AAP?F0%hL~~!<|n@nFyKM)b`%!>!x#H=275eaYdT(?gZid>Rf*c;%xpIWCBN(
zKf4XZ7r7(z<#<;u7mx?zx+zX57=cbT`i$9B^!*_vUGhR1@eB7rRtMv3mg!oAL)6weeUp-B_P(+th40!
z7Aw|H2Z-lQwH3qy#KpD1FTN?nHEv_nR2>t!Ibq-ywg|UzeIaRs(H7tNJCcPTDwz0w
z{WfVr40IJT<2?o2Jv{32CA10sMj~Ep4$CT9hb~we2a%IzX+4M&Vj`?}wDUXPqAP}q
zu<;88C4cAC%sdnzA2m%0EoXyYSicWo1WKBu*SifmeXy{bd+hwkM~tCbRI;hMMpP90
zFy?qb>dUXE%rOehwg5l&GJzYI%B}AD#caIMnwEOfF8ZndoLB71YuFCB)}rX`ki5^(
z+44Y%x=E~xiz`k~EMdC;JB)sXt4FjyE_z+Yq=DctsEg^4?d`m#trKUfKNr2ODw2cm
r0K8TI;Ah*Lgn~^O_}c#<*+*fqEgee_bV&dE(ZJJq30A39wu<~eMRADb
literal 0
HcmV?d00001
From 85bbe022d6717f9fa18d6d82254ffd7b950cf569 Mon Sep 17 00:00:00 2001
From: Elliot <36275109+Falcons-Royale@users.noreply.github.com>
Date: Fri, 6 Feb 2026 13:50:35 -0800
Subject: [PATCH 5/6] scala format fix
---
.../visualization/ternaryContour/TernaryContourOpDesc.scala | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
index 2e9bde676aa..1b5b39a293f 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
@@ -65,7 +65,8 @@ class TernaryContourOpDesc extends PythonOperatorDescriptor {
override def operatorInfo: OperatorInfo =
OperatorInfo(
userFriendlyName = "Ternary Contour",
- operatorDescription = "A ternary contour plot shows how a measured value changes across all mixtures of three components that always sum to a constant (usually 100%).",
+ operatorDescription =
+ "A ternary contour plot shows how a measured value changes across all mixtures of three components that always sum to a constant (usually 100%).",
operatorGroupName = OperatorGroupConstants.VISUALIZATION_SCIENTIFIC_GROUP,
inputPorts = List(InputPort()),
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
From 71ae194bb98bd9ed41cc9eeaf8d26d38dff1d5af Mon Sep 17 00:00:00 2001
From: Elliot <36275109+Falcons-Royale@users.noreply.github.com>
Date: Mon, 9 Feb 2026 14:22:47 -0800
Subject: [PATCH 6/6] reconfigured ternary contour op to most recent PR merge
---
.../ternaryContour/TernaryContourOpDesc.scala | 53 ++++++++++--------
.../assets/operator_images/TernaryContour.png | Bin 6374 -> 167675 bytes
2 files changed, 29 insertions(+), 24 deletions(-)
diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
index 1b5b39a293f..37ed50bc7d8 100644
--- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
+++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/ternaryContour/TernaryContourOpDesc.scala
@@ -23,10 +23,13 @@ import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
import org.apache.texera.amber.core.workflow.OutputPort.OutputMode
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
import org.apache.texera.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
import org.apache.texera.amber.operator.PythonOperatorDescriptor
import org.apache.texera.amber.operator.metadata.annotations.AutofillAttributeName
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
/**
* Visualization Operator for Ternary Plots.
@@ -41,25 +44,25 @@ class TernaryContourOpDesc extends PythonOperatorDescriptor {
@JsonProperty(value = "firstVariable", required = true)
@JsonSchemaTitle("Variable 1")
@JsonPropertyDescription("First variable data field")
- @AutofillAttributeName var firstVariable: String = ""
+ @AutofillAttributeName var firstVariable: EncodableString = ""
// Add annotations for the second variable
@JsonProperty(value = "secondVariable", required = true)
@JsonSchemaTitle("Variable 2")
@JsonPropertyDescription("Second variable data field")
- @AutofillAttributeName var secondVariable: String = ""
+ @AutofillAttributeName var secondVariable: EncodableString = ""
// Add annotations for the third variable
@JsonProperty(value = "thirdVariable", required = true)
@JsonSchemaTitle("Variable 3")
@JsonPropertyDescription("Third variable data field")
- @AutofillAttributeName var thirdVariable: String = ""
+ @AutofillAttributeName var thirdVariable: EncodableString = ""
// Add annotations for the fourth variable
@JsonProperty(value = "fourthVariable", required = true)
- @JsonSchemaTitle("Variable 4")
- @JsonPropertyDescription("Fourth variable data field")
- @AutofillAttributeName var fourthVariable: String = ""
+ @JsonSchemaTitle("Measured Value")
+ @JsonPropertyDescription("Measured value data field")
+ @AutofillAttributeName var fourthVariable: EncodableString = ""
// OperatorInfo instance describing ternary plot
override def operatorInfo: OperatorInfo =
@@ -82,37 +85,39 @@ class TernaryContourOpDesc extends PythonOperatorDescriptor {
}
/** Returns a Python string that drops any tuples with missing values */
- def manipulateTable(): String = {
+ def manipulateTable(): PythonTemplateBuilder = {
// Check for any empty data field names
- assert(firstVariable.nonEmpty && secondVariable.nonEmpty && thirdVariable.nonEmpty)
- s"""
+ assert(
+ firstVariable.nonEmpty && secondVariable.nonEmpty && thirdVariable.nonEmpty && fourthVariable.nonEmpty
+ )
+ pyb"""
| # Remove any tuples that contain missing values
- | table.dropna(subset=['$firstVariable', '$secondVariable', '$thirdVariable', '$fourthVariable'], inplace = True)
+ | table.dropna(subset=[$firstVariable, $secondVariable, $thirdVariable, $fourthVariable], inplace = True)
|
| #Remove rows where any of the first three variables are negative
- | table = table[(table[['$firstVariable', '$secondVariable', '$thirdVariable']] >= 0).all(axis=1)]
+ | table = table[(table[[$firstVariable, $secondVariable, $thirdVariable]] >= 0).all(axis=1)]
|
| #Remove zero-sum rows
- | s = table['$firstVariable'] + table['$secondVariable'] + table['$thirdVariable']
+ | s = table[$firstVariable] + table[$secondVariable] + table[$thirdVariable]
| table = table[s > 0]
- |""".stripMargin
+ |"""
}
/** Returns a Python string that creates the ternary contour plot figure */
- def createPlotlyFigure(): String = {
- s"""
- | A = table['$firstVariable'].to_numpy()
- | B = table['$secondVariable'].to_numpy()
- | C = table['$thirdVariable'].to_numpy()
- | Z = table['$fourthVariable'].to_numpy()
- | fig = ff.create_ternary_contour(np.array([A,B,C]), Z, pole_labels=['$firstVariable', '$secondVariable', '$thirdVariable'], interp_mode='cartesian')
- |""".stripMargin
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ pyb"""
+ | A = table[$firstVariable].to_numpy()
+ | B = table[$secondVariable].to_numpy()
+ | C = table[$thirdVariable].to_numpy()
+ | Z = table[$fourthVariable].to_numpy()
+ | fig = ff.create_ternary_contour(np.array([A,B,C]), Z, pole_labels=[$firstVariable, $secondVariable, $thirdVariable], interp_mode='cartesian')
+ |"""
}
/** Returns a Python string that yields the html content of the ternary contour plot */
override def generatePythonCode(): String = {
val finalCode =
- s"""
+ pyb"""
|from pytexera import *
|
|import plotly.express as px
@@ -141,8 +146,8 @@ class TernaryContourOpDesc extends PythonOperatorDescriptor {
| # Convert fig to html content
| html = plotly.io.to_html(fig, include_plotlyjs = 'cdn', auto_play = False)
| yield {'html-content':html}
- |""".stripMargin
- finalCode
+ |"""
+ finalCode.encode
}
}
diff --git a/frontend/src/assets/operator_images/TernaryContour.png b/frontend/src/assets/operator_images/TernaryContour.png
index ba0e8c3ec23d6dc2260b628e5aee5b672bbd2bf8..6526cb3bbdf3c3b9cf7402ff6f512b5a32b939ef 100644
GIT binary patch
literal 167675
zcmeEt_d8r&*zV{AVTc~V5F#Nm1kp=I4G|@J@4Yj6O&Gm1x+oDPdhc}*qDHSlh~AOUcrQczfc60h1R|D`g{go*n8Eiid|cogBYRLZ
z@CU<1MMe@-HvD)C_<(I8p(p_YRmKqB7~ue)2^?j$TtJ|Q-S;nyK8J6nAkbZb985yZ
zLw|P>?(^smCGN_3#_YfMmR~-V{ri-g!DoHG8Y8D&tAzzEutX|}q$I^AsZr^Wy^RzaeQJo`r>r$hexHw9{Y
zUyMGm178+XPQ3c|-?a!UYBk_WVsYW8&{&>mwzx|X}|oF
zp(ra1mLR+9?>K~1QZ0u=NOi6BDF(d{71iDA?5eagb@a{6TCw6F+F#`nPei;aY3G;t
z{tod_JrrDv%qCwYPX;}E1-V`1{G2qF^DR-(!xESR`Wzw7$Y}hMgG1H>oy4!Q>~V6s
z{?SH5#&%?WiMC@5BzWP|hTBu~f+*faW2TPSM1d*5PN5}7YA5+lXs3pUYT0$3^YV0O
za*#5v7M4gTE+(F&f+qs_NYPzl2U0xfkjlV+t$C+TXq=K$>ovAGE%>d|8dA8tK99u$SFHiG1zKm63m89FBqJhkKf
z!lS1zU<&`fg=UwYboj#g$ok`kgdd4p1ip%lowlsAnL|uW+S<;aVIkottzfp6yZvT=
zX=zv)Rh%v4IcSAzS>$m;{j`j#e35_Xe@iQAC`WB_y~fqHJNNaV^+a0`zmM!&1R@_(
z*7aR*`d$l}F&W6y*i^BvW|&5m6@8{EDd`h}KSgHb{U>xPwNJhXE9L$^5+hn$Qa%vY
z?JmMHS+wDT&SF?m3iTFA&SxEUEfvC~S3Hr>7ea-F=!22vy9*IPL9Zr!b@DIRj6xR=
zDJBeo*}eMvkVU>IID6tbwcCjuoc|WMiC*fZDL?)c$;bel2e}jROvnv3R*UKhy%%MB&m~3Br9_J^>Eby&9f>=LCU9=R4kURzclVlVMY<_Su6!&Rw
z?|_Imz31PB$VN1BOV>o9aHRz^iFy_<+c
z4x3H!sR6s5{NJwkZarbO{kfEOv4TaC|2CEt#qvXch}NAoM?5z#m!-hy?OHX54t8C0
zgX@ZzQV%sG!fG;{LQYBSRH3O?y$LD}q>t@@!vA)zX(!rywTjRF%_mOLN8-ANdmowV
zFU$LkM2^}GSdESy{E>8$F9w
z+;22OGW3#EtlLLtdw|^tX--twwaE4z`@36>drr|$0U0MAZt}~DrvOX996K+m_B^(d
zQuts}((9PdFh5{+oL#@Wtdw~Y;bFKQ>=p@C#9+bRr*-`%LW(g&X*(~yS@W;md?f9X
zBVb@CgD|II^(}TNtRV5q;@&&y)tsGONVet*b_Q&w%eL@x?TzQz?K6s>StY%NF+%={
zR`Iu89$oiBs6^c+e1i^7-yt+xQ
zB)XS4fa=#ZpHn+x`Y)*J%_){93O<{uQv~@e`<>V=@|I}8nO6UO=zH)0yK+o!z;a80;MD#mAxIOzTs{Mp^c6shV6#$D-Hf#LrpdJ?V=s=3FS@i61
zO%FT1LFhb*IN;0UkL(3!B=>O>D+|$qVA1s$cxpA6!ntRz)h1nMClk>@rKcXbWdMOl&QYGew!
zZC_-2Y!6_9XYIArUxN2$==aZd%W9E!CJJNp^-F1Rg*y{ILem#V`;Yyw<&6`$)hGm{@w{gtSos;
zZxp;P&1Mgph1CpXis)_;2|~z7Z(uE_-|Lq$5@KUxnWUW&E#+dCwpgIB@;0=!7i#xX
zk8=YTCEIW^yR$_q&tALypZQh5uz}E1**)Bxb9!v{ZeCN=;4x#?wd%m4a=u24e#7?m
zyJ<{hhXOgMl(6hOKKWZ1-)p<^e>-t`(tgB-SmI0tZy?7>k5W)sPQy7pr#n+@P}PO3
z#rdr4c%Tq`^WPrDLvdlBmT#|kj^@kTt{CJ;&2^pRIiw`go2TFXu7jIdR!Z5Q3DB
zs@-nE`(QXSIxGvLvu-7=xM2-1Ju>_7h$iAr-v^h6pORoWesV*;AaRcC-hm#zLCsEa
zdi_fn;k(|tW|#vj>p)pQ!g7OcUNA!}XDXokjR$4AnA{*!%zj{qAJhrcUrY*zEDRne
z5HN=Jd&4Uc0RauGxn1t(QI9>T1Ii%d@&VK_#F6(KabH`|f5@2a{#0u-J?Xu2+k6FV
zg8}51i}rX6Go*!k!;FO$%ixZh6FibC@O#h
z#m1m$dGX_2`QyH!K8buI$4sF5n)vVepFuG3566)jWDybX#+l;HBjsGZqf6*t{4HZg
zEXn-)A++_6d~>!A%?4J)s-W(OUb!ND6am%oX*)C{5xbfpf($)f6)rcy!-%DRi!Ik;
zcJbY+M5An566Pk=b9~&AsABCucAp>&v9)n5hBAKUW%~#ZSCvwtWC=mg&RPlr?9NvP
z)cPJry}Y0(=2a(H4QqqD!z&(`=}#xD7_{Tx*b@|u$?XkX4|g%IeY9*}`yFf#JBKLsiJ17;gU5uOjhW>^(A
zO+`_uy$4C`J=&
z+hA+a-PdCa?NIsA;;QO$E$|zm;UN?7b?LymmBoXAtO!i`T
zCoI-ROC8mG;Qxw^iQTURiaMy(8c9_Mn&}FKS5-KuYURkboJb9c9z`HpbYVJjeN<0F&Ys@oo@I?tNMZ
z)zPW9t+kpNC9K1G24NLyDC33%m}$NCzg_qn#^68RQ&q|eVg1oMX;TIa()!;ZTwpSU
zTlXpd6t8g;LRJtv3r>q6ir%=}=e${px?u{KXm0sx|-W>7TqXayGw5G!%_|4PT$5
zs5O&6`VUTVqDO=dx^9j0JNF;c^rOsps&FL>)P7d(x8L2a8~~}(PYT=A%xqG?Z>8fQ
zP?*b*je$XF9%!5-5DP2Iy8qdqlpVm03d2`i;H8F6j(cL?Gy^!JXbVU`Y2RM$ln=lj
zZMdC3GEqSOIeGeBuQhG1)~>zQq%T@ZS^*5EAeRJa6!rqiwXI+cpx$`z7(rF;l$Su+`Fa~J;7>;6ME><#JFcua
zH8!gdge_rNNjQ`b+w!cV|9a&78oenoN(91I`Px3KdQhz*@A;Jz@Kp3SAMa#DKx(df
zspi;W)w`EMLo;WL_)Q?hLS6#|Tl~XZqlwq;V0l{u66w!4N(8aJOwqy7!xk@XXn51z
zR?(mBe^ZGQ(B#ds8V<&W!N5B8=F|1WrMKu9EjEZJOznZk@|`9yuP^tb`3FqIOEF%c
z3kEa{To%E-Po-q4dfjkT^knBgi&xA%Z@Ig@=Ku8hGbghlJ9rA?8GaeMo^;F|%+c^C
zgl>a=vJlP(YCoU-i3xrSI}|zl+f96T-d*k+_ALhSYhF*au6FX|9Y6{s-J^%O7J?V=>Gw@zd-L=-Run%T!K!@{P(s_H$y
zOm0J;HBXqBtZ`$k$DiNoVRoR9g8a*VE=|h^N^H3-G39Y8@}KXyCF^!_Fe#Q!y?yWE
z`2tu*Arygp+uQw;89!eN@=H-3Rn65>5yp8NhVX-QwTr4(Kww1kd^Xyz9YpPCJi9Zr
zhKKE$vl)jNimhelAO2*&Qq6p~nzy3`VhrGx1IZPteAq^C0I$LOdB&SicwO
zv%WFL1gZX=C5u{xG29Q~b(n4JH$8SmNQI>1_Dz
zTD@5FD;F#ig$6(O7X{YwemTzy&gh`H$-T;XPrPf6ia)+k4SO?2P5%3|Ius#}tqWh|
z+!%eUlQdVSCl15ePjiSQH(z}X}ganLta>oJEsKND@H26aHl+;FoVyiV6gLu1OJ(^C;
z-bqbuT@B<99z(4(VbOqRAu6%sIbV`=%O%F)%Rc1n;zR*|#pOozg23RT5MsJxm-;{J
zS+3)9QV`Y{#KW1RcmNzm6fO@ZsXSMMeF_S~%-~;d!L4H#Nv6ii5P`Rv7z-cuB-oIV
zuZoh!ISt*E?~1gPBk+4#&bD80G58;urNrP>Bui%K*=nh;O)qEV78e
zjF>$=Fdo~NH_PRBC*;Zx0@hc&)k2V!j-8lR6IY}C!EgT%2Etp#2&89~!S2<7sNZh+S!`?$i`x3*M
z$kb{wv0D!xZZKO|oF=Hv8D12G$T8{;p*}wfwFn4DrEU2{6zYeF0{FaPryiiI`Cr
z>hQ4QanSkf7zDHK4Y!cDO*er0U*d?DA}-E|KG8l%QdzpkNPEG@UxE=Ibk&d^vjr&y
zG$wGI{I=#nMEwXYfE3XIVJqaHeoPR>hr~J%Ud7N3bXvY>`h(QaX~^Lhs9n92n;}kvM$C1Trqx~CYhO>V}Y!nr)XPb3*`?Fx9n|!1S4c6@weWB`lI-x`fX-%
z^uK~lNl(<_7#rH7`L#l%l8lgem*qvc89+0jovV>S%@D=qUrR?TEZ10>2;zsm
zFZrI*4CEJ~`y}~1am2Ms2tNpMF6QpA@(Diz-1q>a%VCiEj2%!tzrW4A>!(KJCR!yoxMO-7~Nbm>Y+8Gn*2x3
zd=mSD#2}vc#8RJe2x6jXCsW0@Nab)1_u`(sSW`<4ed1Ze!pG_rVFx7pAK0-}x>w7T
z_|qWpp9sUt9En2HhmT*qdJ9W#e7ECay~~t21fqlJ
zzd2|70~7$sbpV6%oMPem-*AuBBetEC#Kd20xqm-2g%V%~u0&3Zz_e3p-0JK^jW(1(
zpK98c{NJtAy6H=1k37e$*oq64vA0}k7;p#me!&))=M#m5`}qyIoT``WE`}r0!em|V
zX%3wGVDK^EOaZ=MM?+K1L*5tZD;k9ukAenfN(+PWw^^R<`&`zhq|n68!d}yNK;U+*
z+NHWPb#^Al-QCQ5k@P}*SRE~-eJVgDH@+J0HNEI)LItwIWWjY8QI;<nJogJybU1X{@~h3X
zwv6+M12nnTW)b)h2-{h&`?
z6$u$x!eQ48Gz1^V_DCB3wRAI+&zcUxEuEh)b+U08BX$Mj1OEX%BPc6*05#f4{u$-K
z;5W}`GPDJ;e5{+7sB*tV5(^KF>vfa?>e_W!pgX>*-n0-)mMRX$#TgVZ(aHg7SUoOr
zN#|GkNgAhxt-V?HWz&jeX)Zi0(oW&*W_Y@M>?t6}RSnzVnxO2o`5lHF7
zm#P%ZUAT~|)zr-_bQhKRceaO|ePE^`>I9E@1Xzxg+{9LBJ<~W*<<9OZFekBCMNZDC
zYp**LPASU|mct<>0f=M`8pw0so3KJSOh)QEFKWmcPCHf^^aOhJ&}3*enqKIq=yit6
zW?0-u#^?BDC6V|XknoJF=>uYY+7lHj^L^WScS*oNi_h*6P;u=1bA-@A6PMfNctJP9
z6G?~r*7=yAkTTP;D}cBTN{4J5HKE$X=+`Ot`04y)WHHlp@MJ{z+D2afM>*;2V_hwJ
zV{5OS!{3`6+6{ko&Fzb;Ba&1w?ib$MG1zb>d*bU;;q%>zpW?stBIU4Fd`lSv^7C0D
zea&oNBDnGU9oZyqY+%?Z8mukb#k!pXvs=QO5p>f~tHio};pX1fbig9HM(4M6DPgtEhdx@$>%|#jl%f
z9l02p@zhkm%HDmQI6dBJ-D^imQt?Tn>gAe?!WE
zy)OzKP!y>SrT^VtJ%Q?7n)@>z1_-3G(@nl;E;NHgt{~Bwn%R)G<@kq9(X_7bM+VYU
za%dmO60EDko&DxYyZm-$D<4^&oQ*vO=O>lCp?Dt
ze3lB4X*-&;X`>j_5QpJolj0lw7X5VH<+$}HWevd`6WbLeX|?>t*GkP)jVXX0(>7RE
z6nvR!u_Zd|kC-=}cfW1%v~b;7YRz6gIADPUn)wwgeiA}n$7IuppORuXlfyoq`%7|GyU-87D^~(HWH!{`fg+`eruftLhGq&Ym4{;6JiWn
zUr==aQ*>DI3I_Z1PFXpu-TU0$v(=lS(dbaif@|Av#%O{-lORVNCJzt&<{#bc=(-k1
z61{!C(tpkMG`9?*qD*e~nPc$zE0k^P5sHpY4E=Ay#CAT*UMrbZ{C_1MlU~g_!GM{~
zII(4VXstnb&tm0BOIg2uI1^KsM~B|06bz89i6L~YFWb+~yXy$8
zg!e5Qk&kh>Kr9EdoU1tsNv-C#m{NS7MzkUnkPKQSnwxKbW;4O+xAsRvP*`Z=lBc@f
zeus!L4GF=2gv+Q7i^u2zld=yVzj;p%87q^j`z=cHX6pm!x!I~@;H=TSIbFcbu-*hZ
z?}?A)9zh?2Sa(&a0*Da9*6CpHNdaqinTrAhw9i5fvF6*Fmh+7fIh$$2d`t}G6c)|Z
z6eUFQ&Er0NV?R5+?8BRyy3CgA+4&i8h|VT}#R&$)8O2es{sXR4VgZZ4Omx+J8Mry2
zLf&tNOh_NGqR3-}E9>Cd`t7tPU&+Rv!wcikU3*JWjJZwOeteY!slg+{>k(Y-!c`SH
zhuDy9xleo$O8{I*<$r0GHpUL6B+mF)_d=!R!MoPvpXx-nV$ZtP0V|#OP
zG*Hd6@d>^?Do~}5Zc>IFVyNxbkg8uh@8MXeLeO*)SNUlDYH4(tCsXd$
z`}~m^Dk+-HWxO^1^i+q>!LAT4r^5nCNA!bUO)^NbpBm&=hb5^Dk24QVeTOeliLjZN
zV}hTi5Cly=c%6SLRGi&-7$4z+jqDpb!I*oZtCO&x)f!gba%H_)jhF5F#422BF}J=9I`RqaY}QO0S&HS40t;V=}t6-5!^8dp{ZTJq#uhc6@79bSDg!b3<{e
zyQ_H&Sa&WvHu9uqt`KLNQGhhjT`nW##X9_+Y#;(NqC99w>MZ~97#?|eM1&6Lmjdj^
zE(du(3Q&6p0<>~*vWvwx>AnK0wY5}d{Ewg=mHCg0tX
z2N@tNaTu>HGIA2ajjLVic$>FSI)Y-ak2{$Wj@bAjOqOf*DVwfX(>}#aPRWmnOuk->
zlliv}w4gur6#meALILWOxufl0B#;v
zIsf@_g80hhY5{+{EsoT*r
zwmaU_m;~`(SddVV?DOi6OMXgR5D6kN&7BUx(ETHHD1+a{SGfc~PR&|tH&sZn#z5wa9);hkg?As@4VD9cvD6+(
zg6Mv5@v6>VvKQRGXt|jxPrQn8*1^d``5!w?3?%sN$$sjFaq2RFXUJ2d?X
z`M$c#9Z$eGz4dE>${Ux2A0Oo~^kaMG1i|dJx!1olI3z+~iN}Z|uJ%mg8!I)K8!|NH
z=67|p`A=UYX7~Ij|MD4gt}Amq^|z8#a_6QTEJDNXOPUQbf9ePz{%p77*k0+yeI=)s-_6j04IW*>^Q&EPBqt|S(A3rOeVfx^dHYML`L
z0nwoCU1ePYNRXLU0ayF6x`dZkXaR)#GeZ4fe*k!1QDJa}_31XB@70+A9Npwr$n7M=
z{I#t4`&|3&ASaKiva-{<1vwkv&SJBYWCZ2UDQXiwaNM=m?q$Z0lKnG7Y&Z{v5R>Sa
z_sUMn=>rL@(@%4s9q9Zd=<4cbc9Oc>z)
z^B1XHOfp(MWft5tR_k_mT?59MWFLGnQ-r~y85~viw}Rgf1x!3|_`X3N?D7U8ui0Yi
zRCwF>*!S0VQppvWGCz3Y+t5WvtfyyuR#+@GllglNj+gI+j(W(i-e>cbaPbtrOQr1M
zhT~sx9k#B_7efN?P9plJgzOpO#)@3DL*}*r{sEC1-fXM4fTeS8h&ioVQBYiZT+lL@T=4IK!s{5
zcv}D6W=hyC4r*_^_u?D}+cSA9-0z!-_VY)YU9Ps>oi(#EL}iZxi0enR
zC(ts3Zvo=0N$$YPAO@d9qfo+^Zw_<^tF8dqQ5|R-cUrwb0)4fUT&~N@`JCIBOnjN#
zNXt=C@fin)wZG+cUGVgG{fU&+c^Z@LW~Nc^i4^i=mRDPt0YF=)-zfC_75MjWTx{>1
zWLwNQE*E=TD#}N@`_I`d7~(&GB{&9ymK_{C|1O16E}>WS66~1CIW|83Q4(=C&+?_GBd0Ffw&$$RBHl
zR+lbKBuF|D@vCw5M@fv&+NlPYQG_@3e2+&qb>+L29HE4S1pX|)tDKDde8Jw$EMIr4
z>3zLLdVa%&*^##PukZgHF(~Bl?o3JhqbE1zP)L}uyrV&0Y{kS!oz)X990T8@wQQ{D
z1Ab}PEn_YwsG!*diETFDN}^k5*TorO!r=QaEN{QIb?
zUF`Xb-1eA^P%|f_S_uWjA8(cLY8s8$%=TtF`kL;r+dVW*XX4c)(P@)X
z;%hp>j8?rx9;bjtc*Y*pdpEE&RCskpc{jgB4buzJGE8p%eUh45eGQ@p<(%rf5@Q9x
zWMqEiNLX;u^KO4}_z(2;Q{o>-K}ne<6^tVTQjw{@vo!Ozc+B0J&*p5#Eq-QP
zbG7*q8bhX_i!cF~RhjHI_>UY_fAYAGu*CY!6B7JAXwWNA_OiivwQ66@4kee?LTDIS
zqz7Sj4-07kVsj)jLEwvT;aLLgkRM#P@7Z?kngs}jJ^?tzI!yfhFmkJ!_TSdLP-}io
z4OhTKFS5>F?l7rMQaKJhJ5e~IKlR0=Sq{~YH}smT0oA+``Urtft~_H?-n#Vr$BXhe
zKtU#2XIGE&f$Lj?Aa1}`+7j>i_j57L#mYq{p0K|<^L0F7mV{smwbuiXSA(?T;bvNk
zw)5Gy>wgP{L{4@m?D!m(7O=%3U=HxXmtPiI6;Z9lTGkPAgp|YSkDq{#=*Y{R&oGxx
z9-r}=(wKA%+4*1onU=u9eLSCn7>DHtUbj5=>TpKVppb5Q&F<+yq_2?!`Hm
zWftB8h#jKmA8km(!^0i7*$bbf)w0_>!{`fxD*jNQC1@K@A)h&2J<~bR^cfzlUIsK+
zKZk5UYE^q_RNn4FWXc!6+P|VmXaq}xmkqpH5Ux&CooBzIi0hll0~YH?#(E8WPxgW~
zilUaW4spQAuTJhDmV-@-`m@_M$z#d7ZqkSB_gWCS+HfL%DdYbA=GiJ$(-rs2DQ$L%F?I|zM+g&
zULs78&hC7JlS$@q=J?H?P)#};WG6Faaspt?@WA(F6lH}So#dhA|gDE~bCBzHfCq`l#7#OgXL{gPO$9_3O&aS_z;jOar?^&61hEeZ11Ajg?
z3(hv#d))~IQxNGTBPt7ifW_P=;<&t+5D9&k>Z#*srwGQVGj|ZkRqcDo@35G*sDXX7
zFQD<_)$gxbCmRzR=EL5iFJGE-2iSqatp!_v+^?6_EC7UlHZTScGaG+l=OTPT00hIr
zvDEx3P^|slh9*h)=wfZwsj_m`eo)&{tG}O^!?BqUW927&*(M{Q+Kv=Tf;pOhBT_64
zV~lNAEsVxl_PPGK3{2%9qTd5#6ZBlf{9WPju@(ek17Xe+y}F7RS+-g#-68^M4e37m
zRve9Q7|Ae{c(wjlCiJCFlg~4rLUm!>u|e<)G7tS>G)|D0+m4<8VcRv8gk{{HYIp47
zVsek=QvKTEVKjWv7YY5^3}vklilW1AeJscBmIDY*fB9zs60jcrMIW2v;-nYNnnPR0
z^I=RZ5Cneo_97R2bcKgI5=w);jyt_e(N#9})I54Iwzl$gB2Na6F{&47g~vF&<0;6g
z7271|=G=a<-fZ0ez)SaGrk$Ap4-$Yk()49|=ziU+
zcR_0P%wxquLlDo7dNumKmg_dM(u?Pk&$TY-vAb0Of%2C>65wMG-cl&+5Gc5_rt8C6KawW0y6y
z`6a5a=SRa4!*{EtmRW3YYRAxyJ6`DuDYbPD(<^cq%ZpmAY6k92Q@@E<1;4q$vvRQ8
zmF*vIs*`CF*wAyklPSxOrD2<4vc)boIkk3NZ*2syNdpme(35`}`hI5GYphY#QZPxY
zNwcpW{zdKR@BGVifr!;8+0<;EZ{=SAonbzZ*zR6SSXs%fUQ+2I)jw#C{^WC4HA(j<
zUt3s#J-E(lmZ7Y*gZERK3_)16F@g5t;*xW7=wFZ()}xzCl&mf+4TXpYS`!GgIj@?+;vftR_Sl
zvMu`|B@&JASBL3G?r})WC>$Ls$
zEMkhm|Mi#j-UVgY>V0byj|-p%(J}&y-I2;MqEoILHAQbZvKIudkyCYIMI!-aayPY5QixbP>I5+~O6*
zD*4O8Ra+`eFS8D(K0jwPFTcNWWGV5NN@7PzTA-9_{+*n@CY9d^`U!%i_$zREHXP4q
z>fk3=yRSBZnM3@?%MZq~Y3EA`3*1Ka?AcCpBe+EPnVC-(MEV511e6MX1PSwRx!_O*+5X(}&b=
zDqM^tB&aZGu@I*Xk&&_0uLpowLy4!FF`7c>FT|>8x6iiz=FYb*o0I)^sOPoHZZw)aKVNM(X}|ba%?kS^g`ElVV0a;0
z5hJ>R>~dNYo1vzsfD(#t>?gl-2%XWu4uZzh(b1(}{?RZ{1O(T;q_)RRr1j)C&%Ab~
zzW*$5+}M7N`H(B^CVg*V;Q>wzE+kc1aEW$^=r<
z^MVaiSNAc{w3@Ftq5Vmj?@Ts&_iwv-WplY2E~*lrMo_sKZ!p912cEEh#S9EW99HDU
zh+eTYe=MOSjDoh4ju~4>m^SFN>>ZGjA-@xX!Oi-MUU8?}QxSLj*9VZr0%POg17&F7
z$Yyu=Xw3IoQvoq<-ESekF{h<@(g|wW9N6*(OPVH+pAMW2Dz^{*w
zoA4@n+jf7Z?>u@$Ip4ojvx~H|by
zw(!PO>uH8Z7&P;&NAqF)3mzs{tG(}<9(*u>b)E0?c#VK4sfC^mdxc%r*(GN?U$LiR
z;XWJ!nbLqgv*_(@$%$84xmpf&uRovM?MuKi7gI;eto=U$RqBww&*-y)hp@odTGxT9
z#Ce$opIkF7wlLY8$v$RD4^7P@0NA!oR+ZN7mo7xvN9aT`Vmdq58C@Kv(*;=OAc(L>
z85_!;+s)NH|k7P2L2ZR@TqviwgQf%Z-Bqj<~^CG%?Ni(Ub
z?;2LC2J07`ufJDE+1(8cRU@7a+I%ZYYv5ExG+yuHrQiIUI2Yj<6r81=7}JygI_H8N
zFoQ(4aJ64p%Y{bo6;hkD8ExQzciVM!M|`38`%XOzuU7iIgG
zab}mLPv22ex$cc~7TV3fjewa7fhijJz41R%EUNQm;6RuR=j*J}=3FiGm!iLk-CndZ
zV?_O+_$ZNaNPBd?9I?$XmY34*macvh)h`Wku4gOSB~9`*0HnL)z@eL4te#n`p7Vhe
zgWj9tK_xezwu(n4yI%U!J0|Wg#}(=LQCK9R4a%^#N{$`6`i~XT)N3`m!`bZ>w>IT5
zB2!hRw*Ud-9IO|zZ8X`OTJRh}9Ayf*Uwwe7
zlPgOpnt3~#YtVr#eSY^+WyK|Ay5NF8*YLaa{%C7yR_d)Pg<$YS(ViNrr~G^QdDRu3
zQuIB9nhd-gRlh{4qp6uFDUJmWBYHWniXZbO@k$0@x3?MZk7<K^fQ|i7`mQ;I_5TR0i4^y5;){@Y0p>v^lFD`jQafkw$Sg4KxcG+ZpM_o2E0R
z5g?QK+l`_koV;u=XDptdkW~ecXJ;*5z@CxO?~`S}^F^zn3<5)nfSZ-va0EW``^7>=
zhI@S!^%#NpTN4G>mCc50sp8{5#i%lIOFEz<+ttMnwAS3SMye>oW$|}df)EHQt}CIf
zE1NPUsyKjN`};7D=%t1{eFxCgbc-biCnhEu_Y1sDUpL((GLOKEKy02~A@CII3In5bLuK1U}=nUaB&c!H&{rEN*U?brMBafBOOr7{X`QluEN
zA9{+Pk%mxqahOxbor-v!Aw6zCZz{1!$_gbktf|qp3GHXD6)N1KJVnVQFRRSi)o$DN#V6t|^_2V;dWjF|1yg}g(4-hSfWNx})@`nb
zVPi|GtE;c(~m_Wc8DLKt9>DvKlcCwC%?OJBC@z4)piF=uZ9$TgU8gIT;qR
z`26|vcd9t<>Z%epNad}H|Bg<`ay%=_l`@VJvq#ORY%(K4W&55FE(%6g|3vEDMGGAW
z&QactK@Np+N%qm4=8Y%A&ASZ^Gm@ZLP~GJECn6If#voueNlgGSWCCcGFOc|cUP=Ve
zqG$afMWtVX(J?*rk2ippZcsDfM_qB4Y##!fC4rFUO`VCUBaN!SO4}?ggg#&
zhyWGQ{--!ekh|m2(kp30t;co7lg0T;Vwc}=swWrEQrvM_I(Iy|RXgV${2RV{Mwk=A
zSn^o=xTjvTRR}1BrM`=TdIk>*`|foFqJbnM)wAyT+a4T7u@W1~;GOTHY&6CMlXYUX
zOeFT0Z1ef(^3V*Fi0Iz{)N`)xG2Pzg;j%A1)tr1anWaUj8Y5!_%roIw>uoz+Q
zee{S3fWr@f3Yb10fnR4g@lBc81S3B@&iwEdZaE+frfxafNH5_W^vV`L04x6-9)1eS
znGxD=YW`?sWMpF;j)#y63YPpc{6yGxXQ1#2e=qeVeaFwAVU(g92JjVz`G7irnmY4nJ>6gKl%*eN1
zq~_7XT>TEt(~EF^v)ng;xK5!YkV^#pBq#CezQ?xBxc+cli5mT@ntr-mC;k0U(f&(P
zBjC)46PDaSD?D;xs>N*k!8{*Ns+GNB423DAh2P$1wI_xxAJ`|qcCi6!RrGu2;Y&F+
z(|qCMnQj40;1JGC>-k}zEj$GB#srU!SAM`_XODkVlhVPdd8IWH&T%fi|qAir5O12szJ@$uKf;wNm@n{m~4DHVu*GSG`Sjb
zg=}m3S;U8{q4_?@qj~=GUbi5e0L
zvayy_pJH8b+~akV8r8vW#rQ>BR`jD49+SB7GkyP4zq7fvQ;s{yft-7-U*r7vo^aE_
zSYxZ#`Ql3m&i;AvAKdK=`o?)7eaGLQ>EDIRll4rLuC~g+Y_k2#|6P1lO!LDwv;)pT
zfx@l%0Kr+h~VS+;9lX=8yC)#V`m{P%6cjj6{bnJ2|1WCXqaAb
zb4hUZs{(k4)bMG*V^BfoDPEZ#u{ExE*a%hKWz@8OY9?uREYmQNz5}N{B(6st&93Ta
z$7^(R@jBxB+bF2h@b|2Tzi9;4gkAFTOn78O|BZ6M)7vr2O=Nph7qc2n2UdqoP?39*
zIk*74Y?xA8daJ)he6L!+_gPCbuJ}jD*Q@Emp7KD$K7pKLFq{Vk^U~9uV{-0tVoRSG
z5%#Rc_hZWyvv%gl5+bK8QU7jz*flXiWV#UuS+zR&-Qgi>~ZcmN8%48PmLxx6goQO*;wpaNtGe3w8>&R*3L?&Un(l^?-w@Y+h#=$=7LDzZj>cqLT3*W`&}5
z;7oZh{zL5GhEU^lXZghh^<@NtJ0Q)j@n#b{iyOfHST%MM_a9N-*KH;+j#Q5rzHSC5DU@PEO
zgtaKvy{Q0q3^8IF8?W-UUfd9hX(gYM2LMIK58$l6v_ja3*tKou6WV@kpcfrus=levh#Olc4Z@)W4sezxx1daGk`Pb^yXxJ1NIK`^22
zU2mBg_D$)UOAuBNh?BlMEt5D>D`eUE-M&$%+u$qI5YyL;m8Rf$!)e-2DC9!k_4(kN
zy_$*520|Zi;$}+mI2@r9x`%+^t1KXKHr2F!#t4?Y;6D-^LHqh!y9A>Uq9tbbQQRA_
zmG>Yby_tgYkvl|^2Mp6#Ak#$8Yp~be{TCxw^hFaGy#B>86GntbvwSt>XY}eCLWD&N{5g
z_wB+QcGM2P3o9|q73Y#I9t
z^yyqpM(A249Q99pTXKF#;<&{@UaJ!?O`E{$0UyRw%8=g)^8N+*ye(b#x^jT
z%<{G^@Hl6x_(CPliD>fZP0P(?3c?(jWq>9|8{n0-Sr15K;u6P$PLEZ#u7W#E`Ujo-
zv=qnrb@9QS@R6~8ExO9q^POE`&Ox1#+FGqo9a%mJeCO
z6-_clCP`%*W%{}c18BRlAG+}*<_uinD_
zg@)THL>aH~e-G@4_=cbLXU+epd>?W~0NHsB`kxG;?3I(zUfUT<%&CH3IUIPkI>LdZsDhxJ-njK%^IQe+N*MX%vMx
zuUM2nVnyT68pT^fDS^O@$gWxA>*`*b+7Ae$X6J!vgo3q2o43c>SCXYn;%8|SA17Y=
zHD~e&UW}daL_S}5!lNULF(8IIlKoPwnN&5z4k&hU<;ndL4-H64N<#N{+^83f*VHV%
zDQfqoPKKk{B74maX>vLorZc=b)V*8cWVb}e2~rLl_bFwAPP2dO*>CD~ZuQ
zPOPc~nEmz+^Jmiv7=76ErZ~_iBY4s>bk%Q@{~e#^sfg9VoREgc48CLDpvjn5X6~aM;Ieo^tVVF?NmM;2zagFoOV|>GyAy
zT$sALEKU-cgTNV0EU{B+UjtlIcSWxK(U|mMBsM|pGunY4njUW+Ja|XMBzN;43MpZC
zu2TbnG_vc@WXu0))1eo4y@`Q3Sa9vKVTUVZED7H<*r2?;-a&Pp&K&7!EJzpDx$>U@
zm`yRx2h`B>c~nigVj)q9nW^c567DwS*KRAuT0mrL>MiqMGVB}3T;PSQpMCGZz}SOl
zbLqWEcSwo!F*;^+b7W}G+GNobW}9tR8WI%UYvIJv_}a;-pZ>5-Ix74V^PzN)#4ejD
z;qFYuzRX%qP!OpuK6HZSL(z#&V_u=fuL}T#{ojet?8m_&f88sRWL7IaaNSdet6rq%JTN~Am;3W%9|?O}#0F)Sr)T=qrdg3IiQf7_*l>@M-RAzsS11_UcZ&-lK>)ibHg}%&1)(cD
zc3S3t_$1HL*qz8UG$qlnK-9CO_A5`vIYN^F2LOXz{JZ&+tAL#gVuvs>@oaQ&QRbP%eRPtY~;5Scbu
z*0CpheNmi0wF8x1yqb1JE4mxQXpUq3m@9=)j*RfxZD~boOLs##Do@cmY3F;BU#l0R
zB>$Khc`{kyqTk`*3T%XmG>9;=>%hu;tx)^6c3(tCe)fGhuA|`k&eZCpuKIrWcll@e
zQ+M^4XImp9N`LM{(^F2a=_~8l^9~x|exym40EZLZN`YXY1)JPViRiddS)bp{p=Prc
zp8&k?xUYG=rxG1_np*MOz&HgKGbgbOccV_HC2jxU6Wbv?>mV1K>v50S$Pr+MJ}i8F
zzMn>Ty!?y@n+e>v>EI~NC>QK+%%Lt~yVxG?%m}7L3})=QHC-P*Cz13{LYO><<$Nzi
z5aR!Lafg5C_9-(Nm5q>!>H2;JB>SDhj!(MFQx+<-ZpdHc*-sXXsy%=Ac$){Zv>IRh
zaGPfkcGtKP>=xuy%YkD4r~$hI-E{i|X)OM14!wQL@l{z)f~L#b@3-A}Pq3WK)9d4D
z&@yKa26yUXbtI*iY6-C%l69WR#S&6H*@_cq3WY-Ax|f9Y&C1BeqVPn$I4d
z&IRo>S3bLncZID2NK&@Y@8lK!h604Ob8{pQLc)$62%r|c4|boD1dUL@NzR@Ma9y-aB
zFMlE|2b&}rADp3
zj(nSp;p%q7NXyIY{LTUSkP-qhJMPw%XPUSu8z4FteF*3X4+0S-g#*rIqAcZ4H8^(
zj7Viyg!xM_2Ao#=U~9fChygiSm#rS4iAFquC`=b90)`ybo&_(S>n6ru
z(|4`F>R2z&fy1$jNNBPE?6-KX?_$2EL+bJnNMcK1#dTh7X0}*`odKuA0Sm;ZZKL+5
z$%r&o8I9yg#KzgqGzyFPTT(1;6_g|R?}P7ozMESrk)*`o*;J#lcL6E7|4NPaKUX;B
z+=i3BR^OCx`;pNaf~|RzKq9v_>ZkHQJr0*&S_wfc6YbgDX2sHES~G@35c|Jcn_eFww%VUAy!keT_`}A|UQT;EW%o$o1$G4aLs=OlLP!LY
zDrjN4Rjr!{NOY3HYVNL&Du_3)_6t%@6#k8sgdKl7^c3cXB}r|k<-RkOTlo+CPh*g-
zP-<$VYFI?tbOmr^46X}Rh84ETFv
z%%EsoKnA_90WHj&>+|ErMgPf`1~|#m!TXC-(P7ixaK85|RiRp_!kmDL_Q~5F4>icR
zZz7(xW6wlC`mN>VD0JZT7O$)SKcGniWzORmPhb%7pP!hHf}>o?UeQt%H~UrTl{krA
zVtq+@_=!LPW>nlMy7>j4@fDG(xE*SnG+G!>hk)%XV<_K%?O3=f-t^I)jl`E}9v+@p
zy3paHw^!09lOS(zM@i$LoHcLW&BRd8(~=@&krL2`C;bILx83y*L!k~Tu$NOUg*hQl
zqhhwmxyGI2GOPCqKxd2r{sy~1Q-ukrvp{4~yXTfy`lLzb4rBh;^IO6_tUV2}h_J*w
z^`^u7=tzcuY{tN&jxifKs~ULx_5DARD08n@5Rq*~pv8@s-*7V52(3ae%ze
z=K&iP7s-PA>f{9EkJLW1Narcn5ug#}GeAOf`2%{334?s_`EtV+L#w2uR16H?FPg)N
zz)4fWjWa(nJ?Y!sBkYixdb`&5nF1YuC-yXBcS
zdLs*ACgkJKxi_|>fDbzwTjR$K)W>wKRwqFlVCqnFK3OK8mMc6VkyORSZv@{5*f4)F=~={PD0k75L7uiCGX(Po@e0Ezy8UDq7Coa7q6#J!=FJGE_>G#
z$10M|IdRt7?M-oC##i?u{|r~Q)C>dY?1W-Os-}((P$DJ%J6Z&R_U+5(teX;`f#;+X
z%Qc$u)K-CZX2IrrqM%8nq;!*m745$)chUfphKYpRom#4WwoAMkfU^*QBM|PR)8gu;
zfdko)nvdzmSc4=Lh8xf%a8h1q2$h=NpCbyq2tV8!QNwi+^lt>6XvHOGo=
zL`?wXQiBu4z}+;pPX*-ezNYne2xE0E{MS$1vhI-iXJaGhY|m>vTj_H_s{^I+Fb235
z@&JrnaaaV3hX!+Pc35|}-O4l(!?i>+NvGiA4p&-yRQ_cCYZ{Tv!QUsWAF-xxv{?_7
zq&(+OC?Dqr?W8}20lpGRTm^IBM`5^H^kT~a0~US&O8Ed%Gb5`a7<4$}yVlSG54oOq
z6s7~(Uld8^5H~w~rDz0_l1?wl<+t>tx;X@!By`?Gr=com*@6(3
zeN6yekU>bbiEiezI*w(9;DUXGAjbsoKDzuV%YXR?dA^^rZ7%aCZ)Yt7HjbH}=)iJJ
z7lux*_~Jv)P3)jsSTA7Foz8*Ww0;)dz15W+Q~6azpC3UvV-SVTBH#!R5v}j<{y$PN
zE3t!(BUzN@g*;mgP36u%KmX?Z%E)*|2rg%R_}eZM7qqkFKY11+?zS+Ih5#t&Dwii^
z0(3@a>%_Q9g%6O3-mOO{sm+b*uoqPKgwZu3G07epH~CuQ@@{jI(tew{kNp|~r;qDP
zP$_zr(MQDq{YKbN1A2C`>}91_+{u=mKA8Z`aU_wJmPKR*onJM-V@j1X4yT3HJyCcX
z{&DrX-9Af=^+nSw-L*N#fp1t869KUZ0FM0e$=D)&_aM=HojBT}RR8E^QN6*PYVe5{)sAfiB#{S^Ll#yczq!@Hl0>coS6n5?j9jaOj|3x1Dy
z`L0Ton3lCyGe#qoPc7)UuUZz_oB%Dz@#>%yL*Ru0_Y``7@mVY3KD|~idTwY
zWoPF*@I2I;-nd!Cq>#KCGxDsPMDQ_$qDR9T4}S`vZqtJZqd>Tjery&%kd*k;yu-!N
z(cyquA=o|tYl)YkdceQlOBgBGIzC&-wwEE>1$E#Q|C&$Pc6D#gmVlH$*){hTYD2>1b^l%aC
z&I9{5O84p^te#ctCCObNWzFtI%_o%_xW<-v9v|EcGjr)PjXU
zG?&K%R{CK#+pS)}cxudNHmvmTkZCdFJEokl(X-hy(f%^)%`Rd}vA{Q?V@>@nP2MRw%2Nc}Fb;{Tz=>vuv)f&kmT2h6W$V6c`ec$lN#e&bMvX=F?v
z9{`ZRD#mMsm*&5tN}BQxKXvH|dkG7Z<%V3RdxN&u;wevIFwN|5-zM;xLaj?dw7p{T
z!_5sIt$`XawpQJ*%END0gaM5V?u>I;L*FP3mWWDf3PmAMFtu2p
z@#01I0_v~{9$f1%+8;NVAsfqM<3gW0{w_E4hGl!;FciSCEqoKPmeRJY&O0Jz6dm)D
zq5ZDQZ6@98iL%u(h^JB5abJwb8A`$*pf{fe+!b$)W4os!M7i~U-!%(vRis4DK1$Ma
zI((4&F&zm19Jf}=>ySKpw7YhJ*#+hJ9fc!4hqhlo7;n40aU$hs>v;w$UGnf3>!-mP
zuE_&@TGKZJxnVLXF`0CBvsk16$byB%3^%PGWAN<$0Qgc|APZ|1+<&>NZfbd)EO^FE
zo^T#roLJp^4drGc{wS#^jfY)tJ38XLP=le`Mj(E^*Hu3dfK>rKx}DF?-g?egG4EK9
z2~IZwxwNju4vR?n9Xy#!|F)1i0`hyMw6^cJl6rURClB}%Tl$g0_pU>XLz6535Bu*y
z5W1;J2W>j*H>0C5!=tgo1HB?OH>QO2+>*pE=oMiiWV;Gc1QcuERo|hF&%dW&g&biWSp&eCOzqy+
z4b`&cynp%L^A%hqsZd<WRgcQ6uaH4?qq0vv4ovK~@m+Sb(p
zT1ckL3Q_0ClYl(rg!nq3@gjeB=HJxXGilzbM>J05Z>;Co)Lve
z!Emc7SL*W)zl=OLy#D~3}w3|NHyhCe{nuA>U|-HR+qm7Y%L8zzA#pP5N`o
zHC1jyv4fQFaP@uz&1=X8lLoJ96i6V-r;QB?sG)-@QW7!I+3Y;5GNd?wsHd%?2z
z6iJ1MS096*<-hos;-Osp{C(keiHBzxrsQ|EeYf%OywY6=alE6Zc7?vESO4CA@o0uN
zS&}2d-^oYOChU>-FMJAWN|sJ$d6(}k3BP`MKz)`2*Ar5jX}ywv40r&(CYup+(-$w=
zYP|-C=hG~Gw`b<&kmc;``O-TxrCU0hnnDpv^6?U8-#*En%d;H$yY4zrfTv}>jy{K4
z1>a5AgNXec7mfV%dPV>g5pb;RZUOd!W;MtOJDA_YmAC`5d`C3Qr~+aZ3)_0X%pmP^
z&=`2Oa|fHMIic;@0AoQ0HD0IGYyuuoO{bZ>t4!>dxVgo^cNA#1JIRB=9C=kht#^WG
zTn^q>c90vQA*Y|fo&>$k39&qAW$LJ^^?P18=h~q8U%zzc*+YZkj#1M3nWEgXSErL&
zvvmT(N6~hEPo0o84?dfN-ays_nI7VVT~K*H1Jl%UxwllM(|Az>u;|v{vY*RJM>`W1
zcr9R5ormbr1w2y{e*Q=MHA6pQP!u8@0a%zY*H3&qsUT!JFfFUF-Tki>O{X6pEfVtC
z9L&u-L2GI3e%>SP3PbMKF9gYiV$vJ2j_T;+^h!McT~A0BsfrQuh1zk4lcmN?5$1vr
z*`==E+iUosV~^?=RY!;a=n6XgBsEywl!bOdKS0xduBs}6k6Wr_$;o@U$^-GOe%yXB
zBRagLCy0LT1R<~UER(aH*$@UuGBO}U>o9pQW={^qZ`P;`WSaeAbl!*Np(iLcbhx>B
zUhke()bx$WfeAVDxUiKF2$Hd!VMur`B34P0P@xa9Mr{`_0ZcK}e0IzJPM2O(rU5;@
z#bg~hk8EI8iDPdX86hmFnc%g~EO`h;nc)4c0-_@uEr1cGf{DOKiB0O#l9B~t22$3h%)S|UlWt=&
zQv*Do(HH@XR&BCwQ3q2d^P-f83Bc3s9I!S4?v^OP@9SsTY#SgCp<{#W1OA5JOz0lo
zK4S~7c?#51ovYxP!-KM4bhj$bsHc3xc^GB6Ij<=kl4aGAkR_i7rR}QH{I-yB`HPRu
z=PkUPfO;=UF4xV|-D32sr()mZ72K|&AHy0nx4rsW1MvYJcL4p84D#mK)JHb+gQ&os
zQB14xSZo*Lp1zIeF{DuByil!$scDNIEcAg;qnGKH54DxI$H5zFix(e}ACUOgQQ-M8
zLtmz}cK%DJ;s4bjlhkWNe2G~B;xSwfk;m#LXuN#@kafQ>Lge?_iJ|oFq3RGizv740g{R-K<%x+g
z;AX3_YVsPFpJ&Uze4$}?Gw{s4NLu^nByR1pY)~e3zs#27CME1NGbjh6xM30j_N&jmQnhzcG_++Xxg`g~Wi5~p
z5OB5AGV)!^zSM>Q*Xu%9-~HG^Lv
zV(^Uq>6EnHPuiup*oQjgN#5cFf$dUi{x4`kFSob&eRes3m-BJbfyn8>=vo!4TF{gnJ;*exaoN+KGjRz?+arKJqhPh?mW8yX581>NcALLZ
z%$XHD*hDhGp!ecGcU+NN+*Kj)YaYniqS4suw^cFdUI}|RGa1l5a(U6o
zLaTtpSk)uAY`?#Ke70sscH17v)cg-mjd2NIw32CSgRyY4d3xN`GJ&yp}!L;d61Ea|c!
zS66L6ND286WelQ=YQ3e@?b+@Ozj*AR_{b@L_=j1?()SXWPTQ-K4Xx!c%u+2fpb8=S
zcq7=X;Z=g&^@Ry<7cN8-bUxb>xuL^Niu)+;p~qbS^;5Zk)#IqtiWj~(A}d5afYHYR
z6T~kSQET~Sjw3#uJVefFVc~37kY37r&cb4{mB^~*<*~`gV((Mm^xshto&KRQrK?X^
zfAUa)!RhEryTox0YvS;%&ui+6h(lV(xF6mItcu;q`_xeq5uW&z%4&^#HVv%0nfuCYjy=P{}b>
z!5v%*5ZN#eg8wQlYnu0B<1|1N)svK_rjvGbQIh4)stUAn8Rs#B%l{+1G=k-Jg%zaY
zkhl^4J9JB&5t{k}Gm@?TGFN5WHr33{lZ_^#KLw_s*eZCmzCMgK4?VLa5uF4Wl
zFp@Pe;$`K3y*+|N{&n$_f2Bjh2C*J4!5njv2ae~6wNjgziwhOhY)bG0k?$+A|9V_D
zWUl_`&;PaoU*QU{oZQ{n6mX%=#aq^5dFiMrsZ}N!73mc_NetX;^&b9n)K8Oc^`yI>
zoryDCuam5tWB;ZhFm2ZOVGu;=**bQElgRMjQCAWE7QG$d@S$Zc`PKdoXR)Fon!ciQ
z`{$cS)TiNtV93PaWOGZHoYcuNyd86H34oGABN(5u&l1B$ReevOXO*QGCF)QoO0`T{SwLu}dSRhwpnT1%5Or_G<@4-Yd`7dL5%vR#
z9-fazT!mH@0QB=`U|`@Kkb!PqKHZ+o!-6&68^(nTp^(uV!$v8VfWqB50g6g*{^md}
zE+``QhPeTH_x3Se?;q-eqKknP`L75`QyBJ^*+v{QB-P#oY+^$HXor0UHMc(a{ZV{U
z-jbbY)FI8<|0?gsxB*{U
zvHkF7GPD01gd?7wgVw{W!y5ZxY2Uv;@63V3ElJlGh@N;;gKz@9SpP)6#uf~w+(u~q
zQ98=M(?yo^06P>HCU6;~JMn`vYs&sC!!Rm1welStrN#1bwL^8fV@%8@QGw=fIX@=Z
zvxCYO58ObHDwEupQVf#=TrlJrVs1|d5PL84)Snq0b(gNsv^tZJ{pYw3*iMNh|9t4^
z>+Aat@+R(3Q9^K4iTr4B>%2P{U)QFQFoA`#?<{1C#T+_v;$@`3Y8*>(xg!3b5oG_9-VvdHzA4cFS&q$Z}RSE!7ft~LzXAQS^@7Hh@bt=7-
z0Oul61D}ux5+|$%l}~HHbQ#xV;&Ab^+akMjZS#po=sbrVTLe-X5L`N{0M6}>L&1AY
z{v&dBdOM!Wo5!Ja-B!luvRjJT@$3PB^DR4QRBUz>uWFgWZHGjFWK*E|Ipm}N(%Ebw
z5aasRdbm#ar!7C*v1)f9m6{d|bvp$_#-zWb{Hjl!#;Ff505RG-N-th0e>JE7&%v(D
z*3;Zzcrng-BxQ6G$YU61aAfl>MJT6q4T{KcvwH|G>|a4s^pHrb0*4#Mn6JfFz|F+)i0tX=5)X#mspyfES7Mk@qO?cGWJq}z1i
ze)kd3doCs|i28g*g;*IJxPB7q@2V*q+~7@pdVRV+u8-iV*<5+2SJ9rCE#P$z4N062
z*i3gUCr>*2;sSMyubx{Qe3wf@dq0|HHC>HdUxFXV5QNHpD#@R&+E(fN
zSoUNK1r^mPWzr<*$xf<4P(BOCB?1k=U|Z!lc&FJf<7vN-h(zw5Ki-e!J$NI#J$Hv=
zb|Hm^l9Us^EDb)jcHr=jU_uZGOSGDFJBTn?4XBpz4%x
z*|7?P#V|7}SU`HK?5nG@L7Wk(O`eH*z^w)W-&9soPQcht2qqKD-yG#bYT(8nc=cPi
zJlOwo~PQ>{fQ%T}Mx9%hof6f5#H_jf*@q&|alZ0p3^m%lp|hMam)yS#tY@;slQ>n96!S-|d+_+1r}
z6X!ApVT3!o-@dfmnXjz5$!OwA;$^PLv<%^H}0}LXhD>s+>R*;i;dz0j)
zgWtk&JhMP^WP8bj=|x&rm<=_S0QNT$SjNx+?ZT%VH}+mzWp#Dd=GB|Hk)u{OtJVOE
znJFW<=@{B%=3#^j>xY#kFA!k=x{OTGStXP-9Hga>J
z^ptQrV=-)z4LZA-=&K$Mom{!~^0Dpgt*+a7+#J2DLBn%!Nzjn}0Xu>cns9w43y6Zl
zG@G*!EzLGfEm64GzbBxAZI}Dgvz8)18b-Z`ZO5Il4uFk4L|`%GWgqqJnB01Nslt?O
z3c8-pxfP!21IS-i{sbMiANm08HU~lHLj36SWP_bf7%*fF>ZZZrczEx%P}kJajyVuBh@wP^
z_0nK_pkzXuNZ()Hj%&SR2%Td?Jry*g8YikKCLF8aiq_!_P4WgVuXS|^olGLRg8kmU
zU(;r1#TA0(m|FZKWe$%WnXD%H%bm5oy;GRF=s@~PFB!u>*=jxoUoELxgLP%VVvsyy
z$iyy?FnV=0?PsN=>3^TxYglUh4Z)n0&KeKRvFxlt^3&Kkin{$;+Yj*C(vE%q-ckt`
z5H76^6{7M;4A)!I7o^9_X2H78ym79KxJ_(-3<*&p-SBZF1DV;{ty7PUj$}=h1?=F0
z9^owQVxE7VZurdH`OjBunUf@0nALkfXpetq_bib
zdMo});&zUyPYblv#p?zAI22Tq**_IoaIlB}9i#FiC)|tt@Aq^;R##KKaeo`1Y@$SP
zGssHhmy2IjCxdxs<)XE9$UV9e{FO2A^JDNCc~Yu=(^w)dP!RMB53-;*k`H4W>jjgMx2C-TVt!MXGx5m;-84{lnO-vtt@GqeHY3`&Tfyj9!v4k}?e
z5M95VYJVo$@LJztHjYQq!d+p&cORV-w%yl9dZQL?74vF3lrYlua<1A=W#Y34e*O4z
zO+IHhBS^0;OAyrC*T4ETQQk5gNqV1+&}7f!Kgekk>-V*qy+Tv7XL;@%zoSE)+7|$i
z98vDhM)yyPY1E*H?}(-4ce5K;EW8XVEr)x0g2Uz;uJAs*?rn=i%#iC6OD0pEBiq%P
zGZC>ducW~>@lfY@XsMl@onMycNwH12X(Qk5e&P!Hq3+H5zVqIN@vUq*gG2Dc|6&56M0YH7u8mAGv3r;F@^#vbeAxx^=
zL%UZe$1DvyZQ5nP+&<@^Wh;Vyny|?Nt&^v&ixiSxoAK{)o$7Y`OTaM1+zG!$jYPFl
zt