diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index a627663a93bc..4dbcf1a5a388 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -144,6 +144,7 @@ list( src/server/handlers/handler_base.yaml src/server/handlers/http_handler_base.yaml src/server/handlers/http_handler_static.yaml + src/server/handlers/http_handler_static_stream.yaml src/server/handlers/implicit_options.yaml src/server/handlers/ping.yaml src/server/handlers/server_monitor.yaml diff --git a/core/functional_tests/CMakeLists.txt b/core/functional_tests/CMakeLists.txt index 0ca51c4fcd17..b50b7960b523 100644 --- a/core/functional_tests/CMakeLists.txt +++ b/core/functional_tests/CMakeLists.txt @@ -32,6 +32,9 @@ add_dependencies(${PROJECT_NAME} ${PROJECT_NAME}-metrics) add_subdirectory(static_service) add_dependencies(${PROJECT_NAME} ${PROJECT_NAME}-static-service) +add_subdirectory(static_streaming_service) +add_dependencies(${PROJECT_NAME} ${PROJECT_NAME}-static-streaming-service) + add_subdirectory(slow_start) add_dependencies(${PROJECT_NAME} ${PROJECT_NAME}-slow-start) diff --git a/core/functional_tests/static_streaming_service/CMakeLists.txt b/core/functional_tests/static_streaming_service/CMakeLists.txt new file mode 100644 index 000000000000..0bbd2fea1b2f --- /dev/null +++ b/core/functional_tests/static_streaming_service/CMakeLists.txt @@ -0,0 +1,7 @@ +project(userver-core-tests-static-streaming-service CXX) + +add_executable(${PROJECT_NAME} "main.cpp") +target_link_libraries(${PROJECT_NAME} userver::core) + +userver_testsuite_add_simple(WORKING_DIRECTORY tests-serve-from-root) +userver_testsuite_add_simple(WORKING_DIRECTORY tests-serve-from-subpath) diff --git a/core/functional_tests/static_streaming_service/main.cpp b/core/functional_tests/static_streaming_service/main.cpp new file mode 100644 index 000000000000..34d4c0455504 --- /dev/null +++ b/core/functional_tests/static_streaming_service/main.cpp @@ -0,0 +1,12 @@ +#include + +#include +#include +#include + +int main(int argc, char* argv[]) { + const auto component_list = + components::MinimalServerComponentList() + .Append(); + return utils::DaemonMain(argc, argv, component_list); +} diff --git a/core/functional_tests/static_streaming_service/public/404.html b/core/functional_tests/static_streaming_service/public/404.html new file mode 100644 index 000000000000..e5b08cb62db1 --- /dev/null +++ b/core/functional_tests/static_streaming_service/public/404.html @@ -0,0 +1,8 @@ + + + userver + + + File not found + + diff --git a/core/functional_tests/static_streaming_service/public/dir1/.hidden_file.txt b/core/functional_tests/static_streaming_service/public/dir1/.hidden_file.txt new file mode 100644 index 000000000000..6344582d3756 --- /dev/null +++ b/core/functional_tests/static_streaming_service/public/dir1/.hidden_file.txt @@ -0,0 +1 @@ +Hidden file diff --git a/core/functional_tests/static_streaming_service/public/dir1/dir2/data.html b/core/functional_tests/static_streaming_service/public/dir1/dir2/data.html new file mode 100644 index 000000000000..f15e1f80fbd3 --- /dev/null +++ b/core/functional_tests/static_streaming_service/public/dir1/dir2/data.html @@ -0,0 +1 @@ +file in recurse dir diff --git a/core/functional_tests/static_streaming_service/public/dir1/dir2/index.html b/core/functional_tests/static_streaming_service/public/dir1/dir2/index.html new file mode 100644 index 000000000000..1a2d720f0550 --- /dev/null +++ b/core/functional_tests/static_streaming_service/public/dir1/dir2/index.html @@ -0,0 +1 @@ +index.html file in subdirs diff --git a/core/functional_tests/static_streaming_service/public/index.html b/core/functional_tests/static_streaming_service/public/index.html new file mode 100644 index 000000000000..0dabc21ca9ed --- /dev/null +++ b/core/functional_tests/static_streaming_service/public/index.html @@ -0,0 +1,8 @@ + + + userver + + + Welcome to userver + + diff --git a/core/functional_tests/static_streaming_service/static_config.yaml b/core/functional_tests/static_streaming_service/static_config.yaml new file mode 100644 index 000000000000..68cf6c3144c4 --- /dev/null +++ b/core/functional_tests/static_streaming_service/static_config.yaml @@ -0,0 +1,31 @@ +components_manager: + task_processors: # Task processor is an executor for coroutine tasks + + main-task-processor: # Make a task processor for CPU-bound coroutine tasks. + worker_threads: 4 # Process tasks in 4 threads. + + fs-task-processor: # Make a separate task processor for filesystem bound tasks. + worker_threads: 4 + + default_task_processor: main-task-processor # Task processor in which components start. + + components: # Configuring components that were registered via component_list + server: + listener: # configuring the main listening socket... + port: 8080 # ...to listen on this port and... + task_processor: main-task-processor # ...process incoming requests on this task processor. + logging: + fs-task-processor: fs-task-processor + loggers: + default: + file_path: '@stderr' + level: debug + overflow_behavior: discard # Drop logs if the system is too busy to write them down. + + handler-static-stream: # Finally! Static streaming handler. + dir: /var/www + path: /* # Registering handlers '/*' find files. + method: GET # Handle only GET requests. + response-body-stream: true + task_processor: main-task-processor # Run it on CPU bound task processor + fs-task-processor: fs-task-processor # Run file IO on blocking task processor \ No newline at end of file diff --git a/core/functional_tests/static_streaming_service/tests-serve-from-root/conftest.py b/core/functional_tests/static_streaming_service/tests-serve-from-root/conftest.py new file mode 100644 index 000000000000..a81cc4b0360a --- /dev/null +++ b/core/functional_tests/static_streaming_service/tests-serve-from-root/conftest.py @@ -0,0 +1,21 @@ +# [Static service sample - config hook] +import pathlib + +import pytest + +pytest_plugins = ['pytest_userver.plugins.core'] + +USERVER_CONFIG_HOOKS = ['static_config_hook'] + + +@pytest.fixture(scope='session') +def static_config_hook(service_source_dir): + def _patch_config(config_yaml, config_vars): + components = config_yaml['components_manager']['components'] + if 'handler-static-stream' in components: + components['handler-static-stream']['dir'] = str( + pathlib.Path(service_source_dir).joinpath('public'), + ) + + return _patch_config + # [Static service sample - config hook] diff --git a/core/functional_tests/static_streaming_service/tests-serve-from-root/test_static.py b/core/functional_tests/static_streaming_service/tests-serve-from-root/test_static.py new file mode 100644 index 000000000000..ed2e45eab0ec --- /dev/null +++ b/core/functional_tests/static_streaming_service/tests-serve-from-root/test_static.py @@ -0,0 +1,47 @@ +import pytest + + +async def test_file_not_found(service_client): + response = await service_client.get('/file.not') + assert response.status == 404 + assert b'File not found' in response.content + + +@pytest.mark.parametrize('path', ['/index.html', '/']) +async def test_file(service_client, service_source_dir, path): + response = await service_client.get(path) + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + file = service_source_dir.joinpath('public') / 'index.html' + assert response.content.decode() == file.open().read() + + +async def test_file_recursive(service_client, service_source_dir): + response = await service_client.get('/dir1/dir2/data.html') + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + assert response.content == b'file in recurse dir\n' + file = service_source_dir.joinpath('public') / 'dir1' / 'dir2' / 'data.html' + assert response.content.decode() == file.open().read() + + +@pytest.mark.parametrize('path', ['/dir1/dir2', '/dir1/dir2/']) +async def test_file_recursive_index(service_client, service_source_dir, path): + response = await service_client.get(path) + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + file = service_source_dir.joinpath('public') / 'dir1' / 'dir2' / 'index.html' + assert response.content.decode() == file.open().read() + + +async def test_hidden_file(service_client, service_source_dir): + response = await service_client.get('/dir1/.hidden_file.txt') + assert response.status == 200 + file = service_source_dir.joinpath('public') / 'dir1' / '.hidden_file.txt' + assert response.content.decode() == file.open().read() + + +async def test_invalid_path(service_client): + response = await service_client.get('/../../../../../../../../../../../../../etc/passwd') + assert response.status == 404 + assert b'File not found' in response.content diff --git a/core/functional_tests/static_streaming_service/tests-serve-from-subpath/conftest.py b/core/functional_tests/static_streaming_service/tests-serve-from-subpath/conftest.py new file mode 100644 index 000000000000..dbc5a5e597aa --- /dev/null +++ b/core/functional_tests/static_streaming_service/tests-serve-from-subpath/conftest.py @@ -0,0 +1,20 @@ +import pathlib + +import pytest + +pytest_plugins = ['pytest_userver.plugins.core'] + +USERVER_CONFIG_HOOKS = ['static_config_hook'] + + +@pytest.fixture(scope='session') +def static_config_hook(service_source_dir): + def _patch_config(config_yaml, config_vars): + components = config_yaml['components_manager']['components'] + if 'handler-static-stream' in components: + components['handler-static-stream']['dir'] = str( + pathlib.Path(service_source_dir).joinpath('public'), + ) + components['handler-static-stream']['path'] = '/possible/to/work/from/subpath/*' + + return _patch_config diff --git a/core/functional_tests/static_streaming_service/tests-serve-from-subpath/test_static.py b/core/functional_tests/static_streaming_service/tests-serve-from-subpath/test_static.py new file mode 100644 index 000000000000..ac9644782bbe --- /dev/null +++ b/core/functional_tests/static_streaming_service/tests-serve-from-subpath/test_static.py @@ -0,0 +1,58 @@ +import pytest +import yarl + + +async def test_file_not_found(service_client): + response = await service_client.get('/possible/to/work/from/subpath/file.not') + assert response.status == 404 + assert b'File not found' in response.content + + +@pytest.mark.parametrize( + 'path', + [ + '/possible/to/work/from/subpath/index.html', + ], +) +async def test_file(service_client, service_source_dir, path): + response = await service_client.get(path) + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + file = service_source_dir.joinpath('public') / 'index.html' + assert response.content.decode() == file.open().read() + + +async def test_file_recursive(service_client, service_source_dir): + response = await service_client.get('/possible/to/work/from/subpath/dir1/dir2/data.html') + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + assert response.content == b'file in recurse dir\n' + file = service_source_dir.joinpath('public') / 'dir1' / 'dir2' / 'data.html' + assert response.content.decode() == file.open().read() + + +@pytest.mark.parametrize( + 'path', + ['/possible/to/work/from/subpath/dir1/dir2', '/possible/to/work/from/subpath/dir1/dir2/'], +) +async def test_file_recursive_index(service_client, service_source_dir, path): + response = await service_client.get(path) + assert response.status == 200 + assert response.headers['Content-Type'] == 'text/html' + file = service_source_dir.joinpath('public') / 'dir1' / 'dir2' / 'index.html' + assert response.content.decode() == file.open().read() + + +async def test_hidden_file(service_client, service_source_dir): + response = await service_client.get('/possible/to/work/from/subpath/dir1/.hidden_file.txt') + assert response.status == 200 + file = service_source_dir.joinpath('public') / 'dir1' / '.hidden_file.txt' + assert response.content.decode() == file.open().read() + + +async def test_invalid_path(service_client, service_source_dir): + response = await service_client.get( + '/possible/to/work/from/subpath/dir1/../../../../../../../../../../../../../etc/passwd') + assert response.status == 404 + # assert response.headers['Content-Type'] == 'text/html' + # assert b'File not found' in response.content #TODO: test issue, unable to pass 'encoded' path to client.get diff --git a/core/include/userver/server/handlers/http_handler_static_stream.hpp b/core/include/userver/server/handlers/http_handler_static_stream.hpp new file mode 100644 index 000000000000..1ab0f053edab --- /dev/null +++ b/core/include/userver/server/handlers/http_handler_static_stream.hpp @@ -0,0 +1,73 @@ +#pragma once + +/// @file userver/server/handlers/http_handler_static.hpp +/// @brief @copybrief server::handlers::HttpHandlerStaticStream + +#include +#include + +USERVER_NAMESPACE_BEGIN + +namespace server::handlers { + +// clang-format off + +/// @ingroup userver_components userver_http_handlers +/// +/// @brief Streaming handler that returns HTTP 200 if file exist and returns file data with mapped content/type. +/// +/// Path arguments of this handle are passed to `dir` property to get the file. +/// +/// @code{.yaml} +/// handler-static-stream: +/// dir: /var/www # Path to the directory with files +/// @endcode +/// +/// the `handler-static-stream` with `path: /files/*` on request to `/files/some/file.html` +/// would return file at path `/var/www/some/file.html`. +/// +/// ## HttpHandlerStaticStream Dynamic config +/// * @ref USERVER_FILES_CONTENT_TYPE_MAP +/// +/// \ref userver_http_handlers "Userver HTTP Handlers". +/// +/// ## Static options: +/// Inherits all the options from server::handlers::HttpHandlerBase and adds the +/// following ones: +/// +/// Name | Description | Default value +/// ------------------ | ----------------------------------------------------------------------------------------- | ------------- +/// dir | Base directory path | /var/www +/// directory-file | File to return for directory requests. File name (not path) search in requested directory | "index.html" +/// not-found-file | File to return for missing files | "/404.html" +/// buffer-size | Single read buffer size in bytes | 8192 +/// + +// clang-format on + +class HttpHandlerStaticStream final : public HttpHandlerBase { +public: + /// @ingroup userver_component_names + /// @brief The default name of server::handlers::HttpHandlerStaticStream + static constexpr std::string_view kName = "handler-static-stream"; + + using HttpHandlerBase::HttpHandlerBase; + + HttpHandlerStaticStream(const components::ComponentConfig& config, const components::ComponentContext& context); + + static yaml_config::Schema GetStaticConfigSchema(); + + void HandleStreamRequest(http::HttpRequest&, request::RequestContext&, http::ResponseBodyStream&) const override; + +private: + dynamic_config::Source config_; + const std::string base_dir_; + const std::size_t buffer_size_; + const std::string directory_file_; + const std::string not_found_file_; + engine::TaskProcessor& fs_task_processor_; +}; + +} // namespace server::handlers + +USERVER_NAMESPACE_END diff --git a/core/src/server/handlers/http_handler_static_stream.cpp b/core/src/server/handlers/http_handler_static_stream.cpp new file mode 100644 index 000000000000..59f645cb1253 --- /dev/null +++ b/core/src/server/handlers/http_handler_static_stream.cpp @@ -0,0 +1,182 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +#ifndef ARCADIA_ROOT +#include "generated/src/server/handlers/http_handler_static_stream.yaml.hpp" // Y_IGNORE +#endif + +USERVER_NAMESPACE_BEGIN + +namespace server::handlers { + +namespace { + +using Queue = concurrent::StringStreamQueue; + +std::string GetNormalizeDirectory(std::string_view dir) { + auto slice = dir.size(); + // NOLINTNEXTLINE(modernize-loop-convert) + for (auto it = dir.rbegin(); it != dir.rend(); ++it) { + if (*it == '/') { + --slice; + } else { + break; + } + } + return std::string{dir.data(), slice}; +} +constexpr std::size_t kDefaultReadBufferSize = 8192; + +void DoSendChunks(http::ResponseBodyStream& stream, Queue::Consumer consumer) { + std::string data; + while (!engine::current_task::ShouldCancel() && consumer.Pop(data)) { + stream.PushBodyChunk(std::move(data), {}); + } +} + +void DoReadFile(fs::blocking::CFile& file, Queue::Producer producer, std::size_t buffer_size) { + std::string buf; + + while (!engine::current_task::ShouldCancel()) { + buf.resize(buffer_size); + const auto read_bytes = file.Read(buf.data(), buf.size()); + if (!read_bytes) { + if (!std::feof(file.GetNative())) { + LOG_INFO() << "Failed to read data"; + } + return; + } + if (read_bytes != buf.size()) { + buf.resize(read_bytes); + } + if (!producer.Push(std::move(buf))) { + return; + } + } +} +fs::blocking::CFile SafeOpen(std::string_view path) { + boost::system::error_code ec; + if (const auto is_directory = boost::filesystem::is_directory(path, ec); ec || is_directory) { + return {}; + } + const auto fptr = std::fopen(path.data(), "rb"); + if (!fptr) { + return {}; + } + return fs::blocking::CFile{fptr}; +} +fs::blocking::CFile AsyncSafeOpen(engine::TaskProcessor& task_processor, std::string_view path) { + return engine::AsyncNoSpan(task_processor, &SafeOpen, std::cref(path)).Get(); +} +bool IsValidaPath(std::string_view path) { return path.find("..") == std::string_view::npos; } + +} // namespace + +HttpHandlerStaticStream::HttpHandlerStaticStream( + const components::ComponentConfig& config, + const components::ComponentContext& context +) + : HttpHandlerBase(config, context), + config_(context.FindComponent().GetSource()), + base_dir_(GetNormalizeDirectory(config["dir"].As())), + buffer_size_(config["buffer-size"].As(kDefaultReadBufferSize)), + directory_file_(config["directory-file"].As("index.html")), + not_found_file_(config["not-found-file"].As("/404.html")), + fs_task_processor_(GetFsTaskProcessor(config, context)) { + if (!HttpHandlerBase::IsStreamed()) { + throw ClientError( + HandlerErrorCode::kInvalidUsage, + InternalMessage{fmt::format("response-body-stream must be true for {}", HttpHandlerBase::HandlerName())} + ); + } +} + +void HttpHandlerStaticStream::HandleStreamRequest( + http::HttpRequest& request, + request::RequestContext& /*context*/, + http::ResponseBodyStream& stream +) const { + std::string search_path; + search_path.reserve(request.GetRequestPath().size()); + + for (std::size_t i = 0; i < request.PathArgCount(); ++i) { + auto& arg = request.GetPathArg(i); + search_path += "/"; + search_path += arg; + } + + auto& response = request.GetHttpResponse(); + + const auto is_valid_path = IsValidaPath(search_path); + + LOG_DEBUG() << "search_path: " << search_path; + + auto full_path = base_dir_ + '/' + search_path; + + auto file = is_valid_path ? AsyncSafeOpen(fs_task_processor_, full_path) : fs::blocking::CFile{}; + if (file.IsOpen()) { + response.SetStatusOk(); + } + if (is_valid_path && !file.IsOpen() && !directory_file_.empty()) { + if (directory_file_.front() == '/') { + search_path = directory_file_; + } else if (search_path.empty() || search_path[search_path.size() - 1] != '/') { + search_path += "/" + directory_file_; + } else { + search_path += directory_file_; + } + full_path = base_dir_ + '/' + search_path; + LOG_DEBUG() << "search_path 2: " << search_path; + file = AsyncSafeOpen(fs_task_processor_, full_path); + } + if (!file.IsOpen()) { + full_path = base_dir_ + '/' + not_found_file_; + file = AsyncSafeOpen(fs_task_processor_, full_path); + response.SetStatusNotFound(); + } + + if (!file.IsOpen()) { + response.SetStatusNotFound(); + stream.SetEndOfHeaders(); + stream.PushBodyChunk("File not found\n", {}); + return; + } + const auto config = config_.GetSnapshot(); + + const auto extension = boost::filesystem::path(full_path).extension().string(); + // LOG_DEBUG() ; + const auto content_type = config[::dynamic_config::USERVER_FILES_CONTENT_TYPE_MAP][extension]; + LOG_DEBUG() << "extension: " << extension << ", " << "content_type: " << content_type; + response.SetContentType(content_type); + stream.SetEndOfHeaders(); + + auto queue = Queue::Create(); + auto send_task = + utils::Async(fs_task_processor_, "read", DoReadFile, std::ref(file), queue->GetProducer(), buffer_size_); + + DoSendChunks(stream, queue->GetConsumer()); +} + +yaml_config::Schema HttpHandlerStaticStream::GetStaticConfigSchema() { + return yaml_config::MergeSchemasFromResource< + HttpHandlerBase>("src/server/handlers/http_handler_static_stream.yaml"); +} + +} // namespace server::handlers + +USERVER_NAMESPACE_END diff --git a/core/src/server/handlers/http_handler_static_stream.yaml b/core/src/server/handlers/http_handler_static_stream.yaml new file mode 100644 index 000000000000..53d5448e7bba --- /dev/null +++ b/core/src/server/handlers/http_handler_static_stream.yaml @@ -0,0 +1,26 @@ +type: object +description: | + Handler that returns HTTP 200 if file exist + and returns file data with mapped content/type +additionalProperties: false +properties: + dir: + type: string + description: directory to serve files from + defaultDescription: /var/www + buffer-size: + type: integer + description: read operation buffer size in bytes + defaultDescription: 8192 + fs-task-processor: + type: string + description: task processor to do filesystem operations + defaultDescription: engine::current_task::GetBlockingTaskProcessor() + directory-file: + type: string + description: File to return for directory requests. File name (not path) search in requested directory + defaultDescription: index.html + not-found-file: + type: string + description: File to return for missing files + defaultDescription: /404.html \ No newline at end of file