diff --git a/data-recipe/README.md b/data-recipe/README.md index c7a40eb..7a4de50 100644 --- a/data-recipe/README.md +++ b/data-recipe/README.md @@ -91,6 +91,22 @@ Output format (question JSONL, one per line): {"config_index": 123, "question": "Problem statement ...", "style": "codeforces", "stage1_analysis": {...}, "stage2_output": {...}} ``` +#### Creative-Coding Feature Generation + +Creative-coding feature sets live in `question_generation/features_trees_data/creative`: +- `blender_bpy_5.jsonl` +- `unreal_engine_5_7.jsonl` +- `comfyui_n8n.jsonl` +- `duckdb_surrealdb.jsonl` + +Force the creative template with `--template-style creative` and point `--features-file` to a creative set: +```bash +python scripts/generate_questions.py --features-file features_trees_data/creative/blender_bpy_5.jsonl --start 0 --end 100 --output ../output/questions_blender.jsonl --batch-size 64 --template-style creative +python scripts/generate_questions.py --features-file features_trees_data/creative/unreal_engine_5_7.jsonl --start 0 --end 100 --output ../output/questions_unreal.jsonl --batch-size 64 --template-style creative +python scripts/generate_questions.py --features-file features_trees_data/creative/comfyui_n8n.jsonl --start 0 --end 100 --output ../output/questions_comfyui.jsonl --batch-size 64 --template-style creative +python scripts/generate_questions.py --features-file features_trees_data/creative/duckdb_surrealdb.jsonl --start 0 --end 100 --output ../output/questions_duckdb.jsonl --batch-size 64 --template-style creative +``` + ### 2) Answer Generation Start the SGLang server + run batch/concurrent examples: diff --git a/data-recipe/question_generation/features_trees_data/creative/blender_bpy_5.jsonl b/data-recipe/question_generation/features_trees_data/creative/blender_bpy_5.jsonl new file mode 100644 index 0000000..0443ff0 --- /dev/null +++ b/data-recipe/question_generation/features_trees_data/creative/blender_bpy_5.jsonl @@ -0,0 +1 @@ +{"features":{"blender bpy 5.x":{"scene and context":["bpy.context","bpy.data.scenes","view layers","depsgraph","active object","workspace","scene unit settings","context override","render layers","scene frame range"],"data blocks and collections":["object datablocks","mesh datablocks","material datablocks","image datablocks","collection instances","bpy.data.collections","library linking","library overrides","asset browser","custom ID properties"],"mesh modeling":["mesh.from_pydata","bmesh.new","bmesh.ops.extrude_face_region","bmesh.ops.subdivide_edges","bmesh.ops.bridge_loops","bmesh.ops.spin","edge crease","vertex groups","bevel modifier","loop cut"],"geometry nodes and modifiers":["Geometry Nodes","node groups","fields","attribute domain","modifier stack","array modifier","subdivision surface","boolean modifier","solidify modifier","curve modifier"],"materials and shading":["Shader Nodes","Principled BSDF","Material Output","Texture Coordinate","Image Texture","ColorRamp","Normal Map","UV Map node","Emission shader","Shader Editor"],"animation and rigging":["keyframes","F-curves","NLA tracks","drivers","armature bones","constraints","IK constraints","action strips","pose mode","shape keys"],"rendering and compositing":["Cycles","EEVEE Next","render settings","samples","denoise","Compositor nodes","view layers","AOV passes","film settings","output properties"],"io and assets":["glTF 2.0 export","USD export","FBX export","Alembic export","OBJ export","asset library","append/link","linked libraries","library overrides","file browser"],"addon scripting":["operators","panels","menus","handlers","timers","UI layout","property groups","addon registration","keymaps","bpy.app"]}},"mandatory_features":[],"idx":0,"leaf_count":90} diff --git a/data-recipe/question_generation/features_trees_data/creative/comfyui_n8n.jsonl b/data-recipe/question_generation/features_trees_data/creative/comfyui_n8n.jsonl new file mode 100644 index 0000000..c8ce650 --- /dev/null +++ b/data-recipe/question_generation/features_trees_data/creative/comfyui_n8n.jsonl @@ -0,0 +1 @@ +{"features":{"generative workflows":{"comfyui":{"core nodes":["CheckpointLoader","CLIPTextEncode","VAEEncode","KSampler","VAEDecode","EmptyLatentImage","LoadImage","SaveImage"],"conditioning":["ControlNet","LoRA","IP-Adapter","T2I Adapter","positive prompt","negative prompt","conditioning combine","prompt schedule"],"image pipeline":["img2img","inpaint","latent upscale","latent composite","mask processing","sampler scheduling","CFG scale","seed control"],"workflow assets":["workflow JSON","custom nodes","node groups","seed management","batch processing","model paths","queue execution","API websocket"],"model formats":["safetensors","diffusers","SDXL checkpoints","VAE models","LoRAs","ControlNet models","CLIP vision","UNet"]},"n8n":{"triggers":["Webhook","Cron","Schedule","Interval","Manual Trigger","Poll","Queue","Error Trigger"],"workflow nodes":["HTTP Request","Code","Set","Split In Batches","Merge","IF","Switch","Wait"],"integrations":["Slack","Discord","GitHub","S3","Postgres","OpenAI","Google Sheets","Notion"],"data handling":["JSON transform","binary data","credentials","environment variables","expressions","data pinning","item lists","file storage"],"execution":["queue mode","retry policies","error workflows","versioning","webhook responses","concurrency","workflow sharing","execution log"]}}},"mandatory_features":[],"idx":0,"leaf_count":80} diff --git a/data-recipe/question_generation/features_trees_data/creative/duckdb_surrealdb.jsonl b/data-recipe/question_generation/features_trees_data/creative/duckdb_surrealdb.jsonl new file mode 100644 index 0000000..485815d --- /dev/null +++ b/data-recipe/question_generation/features_trees_data/creative/duckdb_surrealdb.jsonl @@ -0,0 +1 @@ +{"features":{"data engines":{"duckdb":{"ingestion":["read_parquet","read_csv","read_json","httpfs","delta","iceberg","parquet_scan","csv_auto"],"sql analytics":["window functions","CTEs","pivot","approx_count_distinct","list/struct types","asof joins","unpivot","grouping sets"],"performance":["vectorized execution","memory limit","spilling to disk","profiling","pragma settings","parallel execution","optimizer","EXPLAIN"],"integrations":["Python API","sqlite_scanner","postgres_scanner","duckdb extensions","Arrow","Polars","Pandas","R"],"storage":["persistent database files","temporary tables","parquet export","COPY TO","metadata views","ATTACH","CREATE TABLE AS","VACUUM"]},"surrealdb":{"data model":["documents","graph relations","record links","schemafull","schemaless","table definitions","edge tables","record ids"],"surrealql":["SELECT","INSERT","UPDATE","RELATE","LIVE SELECT","LET","functions","FOR","GROUP BY"],"auth and security":["root/namespace/database auth","scopes","token expiration","permissions","roles","session variables","ACCESS","AUTHENTICATE"],"realtime":["live queries","change feeds","websocket protocol","live notifications","events","LIVE SELECT","live updates","watch"],"storage and deployment":["memory","RocksDB","SurrealDB Cloud","backup/restore","replication","distributed mode","surrealdb start","surrealdb import"],"api clients":["HTTP API","WebSocket API","Python SDK","JavaScript SDK","Rust SDK","CLI","gRPC","GraphQL"]}}},"mandatory_features":[],"idx":0,"leaf_count":89} diff --git a/data-recipe/question_generation/features_trees_data/creative/unreal_engine_5_7.jsonl b/data-recipe/question_generation/features_trees_data/creative/unreal_engine_5_7.jsonl new file mode 100644 index 0000000..b180dd4 --- /dev/null +++ b/data-recipe/question_generation/features_trees_data/creative/unreal_engine_5_7.jsonl @@ -0,0 +1 @@ +{"features":{"unreal engine 5.7":{"python editor scripting":["unreal.EditorAssetLibrary","unreal.AssetToolsHelpers","unreal.EditorUtilitySubsystem","unreal.LevelEditorSubsystem","unreal.EditorLevelLibrary","unreal.MoviePipelineQueueSubsystem","unreal.ControlRigBlueprint","unreal.BlueprintFactory","unreal.EditorFilterLibrary","unreal.ScopedEditorTransaction"],"c++ gameplay framework":["UCLASS","USTRUCT","UENUM","UPROPERTY","UFUNCTION","AActor","APawn","ACharacter","UActorComponent","UWorld","UGameInstance","UBlueprintFunctionLibrary"],"asset pipeline":["Asset Registry","PrimaryAssetId","Data Assets","Static Mesh import","Skeletal Mesh import","Material Instances","Nanite settings","LOD settings","Texture streaming","Derived Data Cache"],"world building":["World Partition","Level Instances","Landscape","Data Layers","Hierarchical Instanced Static Mesh","Procedural Content Generation (PCG)","One File Per Actor","Level Streaming Volumes","Runtime Virtual Textures","Foliage"],"rendering and fx":["Lumen","Nanite","Niagara","Virtual Shadow Maps","Post Process Volume","Temporal Super Resolution","Path Tracer","Deferred rendering","Material Editor","Substrate"],"sequencing and cinematic":["Level Sequence","Movie Render Queue","Cinematic Camera Actor","Control Rig","Sequencer tracks","Take Recorder","Camera cuts","Shot tracks"],"build system and plugins":["ModuleRules","Build.cs","Target.cs","Unreal Build Tool",".uplugin descriptors",".uproject settings","plugin loading phases","editor-only modules"],"runtime systems":["Enhanced Input","Gameplay Ability System","Replication","AI Behavior Tree","NavMesh","Chaos physics","Physics substepping","Mass Entity"]}},"mandatory_features":[],"idx":0,"leaf_count":76} diff --git a/data-recipe/question_generation/question_gen_template/__init__.py b/data-recipe/question_generation/question_gen_template/__init__.py index ab8605c..e74cccf 100644 --- a/data-recipe/question_generation/question_gen_template/__init__.py +++ b/data-recipe/question_generation/question_gen_template/__init__.py @@ -8,10 +8,12 @@ from .codeforces_question_gen import STAGE2_PROMPT_TEMPLATE as CODEFORCES_TEMPLATE from .leetcode_question_gen import STAGE2_PROMPT_TEMPLATE as LEETCODE_TEMPLATE from .atcoder_question_gen import STAGE2_PROMPT_TEMPLATE as ATCODER_TEMPLATE +from .creative_coding_question_gen import STAGE2_PROMPT_TEMPLATE as CREATIVE_CODING_TEMPLATE __all__ = [ 'STAGE1_PROMPT_TEMPLATE', 'CODEFORCES_TEMPLATE', 'LEETCODE_TEMPLATE', - 'ATCODER_TEMPLATE' -] \ No newline at end of file + 'ATCODER_TEMPLATE', + 'CREATIVE_CODING_TEMPLATE' +] diff --git a/data-recipe/question_generation/question_gen_template/creative_coding_question_gen.py b/data-recipe/question_generation/question_gen_template/creative_coding_question_gen.py new file mode 100644 index 0000000..ef3db53 --- /dev/null +++ b/data-recipe/question_generation/question_gen_template/creative_coding_question_gen.py @@ -0,0 +1,51 @@ +""" +Stage 2 Prompt Template for Creative Coding Question Generation +""" + +REQUIRED_TOOL_CONTEXTS = [ + "Blender", + "Unreal", + "ComfyUI", + "n8n", + "DuckDB", + "SurrealDB", +] + +STAGE2_PROMPT_TEMPLATE = f"""You are an expert creative coding challenge designer. + +You have been provided with: +- selected_features_tree: a tree structure in which each leaf contains a 'feature' name and its 'potential_use'. +- integration_strategy: a strategy describing how these features should be integrated into a single, high-quality creative coding task. + +Your task is to generate a complete creative coding problem statement that integrates **all** selected features. + +Requirements: +- Write in a concise, neutral, and precise style for creative coding tasks. +- Define a single, well-scoped creative output to produce (visual, audio, procedural, data-driven, or automation), described in plain language. +- If selected_features_tree includes any of the following tools or contexts (case-insensitive): {", ".join(REQUIRED_TOOL_CONTEXTS)}, you MUST explicitly mention each present tool/context in the task description and make it essential to the task. +- Do **not** use any algorithm names, data structure names, implementation hints, or solution strategies. Avoid words like "DFS", "BFS", "dynamic programming", "recursion", "greedy", or similar anywhere. +- Provide clear Input and Output sections with plain-language descriptions. +- Always include at least two distinct examples using "Example 1:" and "Example 2:" with "Input:" and "Output:" lines. +- Include a Constraints section listing parameter bounds or limits, each on its own line. +- Do not include any commentary, hints, or explanations beyond the required sections. +- Output a **single JSON object** with the field "question" only. + +**Output Format (strict):** +{{ + "question": "