Skip to content

accelerated_scan.warp compilation gives error. #16

@roman8ivanov

Description

@roman8ivanov

Hi,
I tried scan from accelerated_scan.warp. I observed it requires compilation every time I import function.
However, I constantly receive error related to "error C2872: 'std': ambiguous symbol" (see below).

System params:
Python 3.13.9
Cuda 12.8
accelerated-scan 0.3.1
pytorch 2.9.1
triton 3.5.1
Windows 11

Error:

tmpxft_00006d14_00000000-7_cuda.compute_90.cudafe1.cpp
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1134): error C2872: 'std': ambiguous symbol
C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.41.34120\include\valarray(20): note: could be 'std'
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1134): note: or 'std'
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1134): note: the template instantiation context (the oldest one first) is
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1181): note: see reference to class template instantiation 'torch::dynamo::autograd::IValuePacker<__int64>' being compiled
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1108): note: while compiling class template member function 'c10::TypePtr torch::dynamo::autograd::IValuePacker<__int64>::packed_type(void)'
C:/ProgramData/anaconda3/Lib/site-packages/torch/include\torch/csrc/dynamo/compiled_autograd.h(1181): note: see the first reference to 'torch::dynamo::autograd::IValuePacker<__int64>::packed_type' in 'torch::dynamo::autograd::IValuePacker::packed_type'
ninja: build stopped: subcommand failed.

CalledProcessError Traceback (most recent call last)
File C:\ProgramData\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py:2597, in _run_ninja_build(build_directory, verbose, error_prefix)
2596 stdout_fileno = 1
-> 2597 subprocess.run(
2598 command,
2599 shell=IS_WINDOWS and IS_HIP_EXTENSION,
2600 stdout=stdout_fileno if verbose else subprocess.PIPE,
2601 stderr=subprocess.STDOUT,
2602 cwd=build_directory,
2603 check=True,
2604 env=env)
2605 except subprocess.CalledProcessError as e:
2606 # Python 2 and 3 compatible way of getting the error object.

File C:\ProgramData\anaconda3\Lib\subprocess.py:577, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
576 if check and retcode:
--> 577 raise CalledProcessError(retcode, process.args,
578 output=stdout, stderr=stderr)
579 return CompletedProcess(process.args, retcode, stdout, stderr)

CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 2.

The above exception was the direct cause of the following exception:

RuntimeError Traceback (most recent call last)
Cell In[26], line 1
----> 1 from accelerated_scan.warp import scan

File C:\ProgramData\anaconda3\Lib\site-packages\accelerated_scan\warp.py:13
6 cuda_source = (Path(file).parent / 'warp.cuh').read_text()
8 cpp_source = """
9 at::Tensor warpscan_forward(const at::Tensor &gates, const at::Tensor &tokens, const at::Tensor &out, const bool reverse);
10 void warpscan_backward(const at::Tensor &gates, const at::Tensor &output, const at::Tensor &outGrad, const at::Tensor& gateGradOut, const at::Tensor& valueGradOut);
11 """
---> 13 module = load_inline(
14 name='warpscan',
15 cpp_sources=[cpp_source],
16 cuda_sources=[cuda_source],
17 functions=['warpscan_forward', 'warpscan_backward'],
18 verbose=True,
19 extra_cflags=['-DNOMINMAX'], #I added this
20 extra_cuda_cflags=[
21 "-O3",
22 "-std=c++17",
23 "-DNOMINMAX", #I added this
24 "--ptxas-options=-v",
25 "-lineinfo",
26 "--fmad", "false",
27 "-U__CUDA_NO_HALF_OPERATORS__", "-U__CUDA_NO_HALF_CONVERSIONS__",
28 "-U__CUDA_NO_BFLOAT16_OPERATORS__", "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
29 ]
30 )
31 warpscan_forward = module.warpscan_forward
32 warpscan_backward = module.warpscan_backward

File C:\ProgramData\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py:2051, in load_inline(name, cpp_sources, cuda_sources, sycl_sources, functions, extra_cflags, extra_cuda_cflags, extra_sycl_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, with_sycl, is_python_module, with_pytorch_error_handling, keep_intermediates, use_pch, no_implicit_headers)
2047 _maybe_write(sycl_source_path, "\n".join(sycl_sources))
2049 sources.append(sycl_source_path)
-> 2051 return _jit_compile(
2052 name,
2053 sources,
2054 extra_cflags,
2055 extra_cuda_cflags,
2056 extra_sycl_cflags,
2057 extra_ldflags,
2058 extra_include_paths,
2059 build_directory,
2060 verbose,
2061 with_cuda,
2062 with_sycl,
2063 is_python_module,
2064 is_standalone=False,
2065 keep_intermediates=keep_intermediates)

File C:\ProgramData\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py:2134, in _jit_compile(name, sources, extra_cflags, extra_cuda_cflags, extra_sycl_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, with_sycl, is_python_module, is_standalone, keep_intermediates)
2130 hipified_sources.add(hipify_result[s_abs].hipified_path if s_abs in hipify_result else s_abs)
2132 sources = list(hipified_sources)
-> 2134 _write_ninja_file_and_build_library(
2135 name=name,
2136 sources=sources,
2137 extra_cflags=extra_cflags or [],
2138 extra_cuda_cflags=extra_cuda_cflags or [],
2139 extra_sycl_cflags=extra_sycl_cflags or [],
2140 extra_ldflags=extra_ldflags or [],
2141 extra_include_paths=extra_include_paths or [],
2142 build_directory=build_directory,
2143 verbose=verbose,
2144 with_cuda=with_cuda,
2145 with_sycl=with_sycl,
2146 is_standalone=is_standalone)
2147 elif verbose:
2148 logger.debug('No modifications detected for re-loaded extension module %s, skipping build step...', name)

File C:\ProgramData\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py:2286, in _write_ninja_file_and_build_library(name, sources, extra_cflags, extra_cuda_cflags, extra_sycl_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, with_sycl, is_standalone)
2284 if verbose:
2285 logger.info('Building extension module %s...', name)
-> 2286 _run_ninja_build(
2287 build_directory,
2288 verbose,
2289 error_prefix=f"Error building extension '{name}'")

File C:\ProgramData\anaconda3\Lib\site-packages\torch\utils\cpp_extension.py:2614, in _run_ninja_build(build_directory, verbose, error_prefix)
2612 if hasattr(error, 'output') and error.output: # type: ignore[union-attr]
2613 message += f": {error.output.decode(*SUBPROCESS_DECODE_ARGS)}" # type: ignore[union-attr]
-> 2614 raise RuntimeError(message) from e

RuntimeError: Error building extension 'warpscan'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions