From 03e17cba6705499cb1ea962814d114196481cf4a Mon Sep 17 00:00:00 2001 From: Anner Visser Date: Tue, 16 May 2023 10:21:48 +0200 Subject: [PATCH 1/3] Bump GitHub actions in workflow --- .github/workflows/build.yml | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0832122..bfd9495 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,28 +10,23 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'ScientaNL/html-pdf-export' steps: - - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.6.0 - with: - access_token: ${{ secrets.GITHUB_TOKEN }} - - name: Set Github environment variables - uses: ScientaNL/github-actions-env-toolkit@1.0.0 + uses: ScientaNL/github-actions-env-toolkit@1.1.0 - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: DockerHub Login - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_PUBLIC_USERNAME }} password: ${{ secrets.DOCKERHUB_PUBLIC_TOKEN }} - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Cache Docker layers - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /tmp/.buildx-cache key: ${{ runner.os }}-buildx-${{ github.sha }} @@ -39,7 +34,7 @@ jobs: ${{ runner.os }}-buildx- - name: Build & push Docker image - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: push: true tags: scienta/html-pdf-export:${{ github.event_name == 'push' && 'branch-' || '' }}${{env.GITHUB_REF_NAME_SLUG}} From d038d9c2712580208e2eb69e0c007922729e897c Mon Sep 17 00:00:00 2001 From: Luuk van Houdt <71889608+luukvhoudt@users.noreply.github.com> Date: Fri, 8 Mar 2024 17:12:32 +0100 Subject: [PATCH 2/3] Refactored service to bun Added structured logging Added graceful shutdown --- .dockerignore | 16 ++++--- .editorconfig | 43 +++++++++++++++++++ .gitignore | 5 +++ Dockerfile | 43 +++++++++++++++++-- bun.lockb | Bin 0 -> 11933 bytes bunfig.toml | 1 + examples/html2pdf.php | 19 -------- examples/html2pdf.py | 8 ---- examples/html2pdf.sh | 2 +- index.js | 88 -------------------------------------- package.json | 15 +++++++ src/html-to-pdf-client.ts | 24 +++++++++++ src/index.ts | 6 +++ src/logger.ts | 7 +++ src/server.test.ts | 63 +++++++++++++++++++++++++++ src/server.ts | 70 ++++++++++++++++++++++++++++++ src/shutdown.ts | 50 ++++++++++++++++++++++ tsconfig.json | 27 ++++++++++++ 18 files changed, 361 insertions(+), 126 deletions(-) create mode 100644 .editorconfig create mode 100755 bun.lockb create mode 100644 bunfig.toml delete mode 100644 examples/html2pdf.php delete mode 100644 examples/html2pdf.py mode change 100644 => 100755 examples/html2pdf.sh delete mode 100644 index.js create mode 100644 package.json create mode 100644 src/html-to-pdf-client.ts create mode 100644 src/index.ts create mode 100644 src/logger.ts create mode 100644 src/server.test.ts create mode 100644 src/server.ts create mode 100644 src/shutdown.ts create mode 100644 tsconfig.json diff --git a/.dockerignore b/.dockerignore index 40fa7c8..4af68bf 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,10 @@ -# do not copy/add files below to docker image on-build -.git -.gitignore -.dockerignore -Dockerfile -/examples +node_modules +examples +.git +.github +.gitignore +.dockerignore +.editorconfig +Dockerfile* +README.md +LICENSE diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..30e5eb9 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,43 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = tab +insert_final_newline = true +tab_width = 4 +trim_trailing_whitespace = true + +# Overwrite specific file types +[*.{yml,yaml}] +indent_size = 2 +indent_style = space + +[*.php] +ij_php_spaces_around_pipe_in_union_type = true + +[*.ts] +ij_typescript_chained_call_dot_on_new_line = true + +# Overwrite for poeditor +[resources/language/*.json] +indent_style = space +indent_size = 4 + +# Overwrite specifics from vendors +[Component/**.php] +indent_size = 4 +indent_style = tab + +# Ignore vendor path +[vendor/**] +root = unset +charset = none +end_of_line = none +indent_style = none +insert_final_newline = none +tab_width = none +trim_trailing_whitespace = none diff --git a/.gitignore b/.gitignore index b8ade52..9d0873c 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,11 @@ typings/ # Yarn Integrity file .yarn-integrity +# IDE files +.idea + # dotenv environment variables file .env +# generated files +*.pdf diff --git a/Dockerfile b/Dockerfile index 2b70681..de33f92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,41 @@ -FROM surnet/alpine-node-wkhtmltopdf:8.11.3-0.12.5-full-font +FROM oven/bun:1-alpine as base +WORKDIR /usr/src/app -COPY index.js . +FROM base AS install +# Install all dependencies +RUN mkdir -p /temp/all +COPY package.json bun.lockb /temp/all/ +RUN cd /temp/all && bun install --frozen-lockfile +# Install prod dependencies +RUN mkdir -p /temp/prod +COPY package.json bun.lockb /temp/prod/ +RUN cd /temp/prod && bun install --frozen-lockfile --production -EXPOSE 8000 -CMD ["node", "index.js"] +FROM base AS prerelease +COPY --from=install /temp/all/node_modules node_modules +COPY . . + +FROM surnet/alpine-wkhtmltopdf:3.19.0-0.12.6-small as wkhtmltopdf + +FROM base AS release +RUN apk add --no-cache \ + libstdc++ \ + libx11 \ + libxrender \ + libxext \ + libssl3 \ + ca-certificates \ + fontconfig \ + freetype \ + ttf-dejavu \ + ttf-droid \ + ttf-freefont \ + ttf-liberation +COPY --from=wkhtmltopdf /bin /usr/local/bin +COPY --from=install /temp/prod/node_modules node_modules +COPY --from=prerelease /usr/src/app/src . +COPY --from=prerelease /usr/src/app/package.json . + +USER bun +EXPOSE 8000/tcp +CMD ["bun", "run", "index.ts"] diff --git a/bun.lockb b/bun.lockb new file mode 100755 index 0000000000000000000000000000000000000000..0b54baa1aea149d64008cd1719463416e6fadabf GIT binary patch literal 11933 zcmeHN2Ut^Cu)aW0L_jR8U4d0xOA-hW>AF@>EC`5*g#;1^gancx#g+vWJ1C2yU_n%{ z?1}{&iUmbMEGQN%tK#mWV!?t1U3fD|4#Wny?|rX)&-uP&?!D*y|CuwV-#b5=FvF!1 zCRZS0@3pK)s{0>F2E~0xijs#4L}I+DgP%U!mx9&< zT@6|n^c25wp9lH}&^tl50i6oE1L!!=$R`aI3S=ybO4CxNThl1Y0G`K#ZVftA zBKG4+r4(-lPb8C46gQmD=Se6^z~k`-T3b<}+7#6a^7V%NhM+B4P*hvckX4}rK_icd zBN7YzDar)y(auFJmAEpMJ`Wo6jR#j?eZV$FKJofYv+Ijmk6mrV>gYaleCs5ep;^5+bH~{y+LL}ZJIc?G^+MlO_8alsa@i3TCU;c zpL&M$@@jWN>+vR|F7NkUNXUFudNJ?u^l?3VnVz`1xTrMJubUE*cNyRv01w@>ev~wY#dLx%QkCCay9qbJ z<8Yp!R)4gkIYRJL;KBp&=sOgmm3LqNC-`i*7yt>+r4p z{|5kK|LFi&96z1m*Bl|`PXYYTD*tr>An{{U90b1=@VNdU4%?4BZwim;m>+*=H|)I> zeGm3X3PFu+0(&KeG)H6EWDQ4+Hg$p@@({m#Nh7W+{7}?2;<_oGHm)%rwmb6Rh7|9O zRoVnJ3ekA||Fhn;ZEDdm|HYy~DS6eYX&Z~pHoE!;)%j2Z3vcwPJo3EscxJTup_#Ww z{C07nq4&~qErYNzT`bqmxVEA|_+tLD@kRW3p0RIV2g+~#l^o;}2sRR4_5MtYcy4i{ z_u6YZ+|7GyUQXb<#*WG#5o$ZwJN4-NW#c?}`c-7Y#TE+g#9e#ssdY#SMBae4U_h{ouJAV896?@*1q;AL8YLDvd zQ8_}GcE6yYh2Gj@uUd4{sWn-@BWKLS8@3;=?h}oG^Gm(G*tdw#3J=%V#{Rl2&ahRR z^1C?>%Qrn}b4)WQwpQ0ED8kp^&X%f>YuTr`5t;`VmKD`#e&8{Ktem!<2z~x*%#hY0 z@4CRyuIE*s7ifu>GYmGEJWZ2L^>cn7*Y*5@<+ZXuyJ;^yc>DJ}Nzehuxu;j=wr%0s z+mVvmEh=eM^K|~8F;>>}9Ni$cAXDO)qsFT~U(;st^R;*j$GKYU`fF;rkJg}s{We!> zdL(uH`8V%44||t!^OwC`uIryJ{IU1GTxb1vo#we!+U7q=C{Ir@pXzvZ=v6gdm{#kB zmLty@I?1=NYkbUPqkrZ-zNsH|NO~o{Xnk%?%$T?CoULo}_rI9`a8a;u{^ruGm$}#P z1f7vgs=Zg8@^iS}o|2Y$MkM_KeL{q`X6vg&58lO5{pL$vb=GG_r;j`n8(#F2I?;bJKiO8+g_&amcx2!H$8qbVqOZ{m|;IwR~LH)=$m>5#fb1V!hBpBQH1| zcgpKjw=Z(Y*$<|y?!|W;@6|d+70WulV~$vE?wggGbRg||p9LMxP>bW(3!l0;hV0}H zvrX+8?BYJ=`~o#z_4S0dvT$10xvzs?ID3ztUTxqzzNCMP9Xh!_9(}FeguC9DoA_dM zo8aIn-LkR@m*iA?vhD5(tRsh}>~dQX7;vKh`Dux2yf|kQp}C#8)$6ZK>-P9}%s4y! z+@nyb-A|E5Kb09(@*RRxQ}liQDJ)6!qFS17iMbb7gBaF;b9*E0KG3G{uiv8#AdFZYaot2T^HsQf+igKnf6FT4w= z7uxS@FJ+7^3M<>vch}BeGQxf~4SupK*~Qy-bV03AN4SY5(C2GU-B!i(5`QrxO zi+y;Yf*X>2;C;-rAQAUn%v3dATn~xR!b;Z7eG@!sGE3&@l3o1NsLuMbOWCS>N$<_l zr2La%6;X3b{gRf{PA>6q?Rvzi{l?dW?$7ZZm+P{m?J{$di`rE{MEpzESLCLx)^jR* zXDHNsIW$LRu~cVk*)^+*q`kHMFS*aYkWB?st70} z^zc%%dCNz3Em7ko>$8H_$R=jft&kID-rYv{gzY^OmU43V1kLrgw$&z{T5dbLqROJ! zF(9xbec(QW%A#`1x2>Jo_PcHric`2i84H>|hW-VZ!m zJyM_7$vy$8Y1FpKT(^oBf3)-56W^MdT2a0z=2c`~uGWj!&we|QXqIX}Ys2Lgwpu$G z8H-1inwjrSVxNnAQFWoq;l=#|?@wMg2{?#4b#g4B_?PCTf_p>V>Wvs7bDPDQA z>!j|NmaSgbePrgzg42;Ml3H&yUR*nf&`Qc({Ia|T1XLd^shV}|NsIK_*dr(1ibbN` zOU3+WuLpc+d$I=*5nkNS5uyFoWpCv12c6S+N7Bc-UCL=!H&FYwbF4$jJ8pzYcHtAP z6*9d^%Y3#w*UZ~<)8^L{c6EoEv=64mJ}&cUhrH5!)#pjFFGOxy`^`V_??)E-TAUj^ z(c_ocZfnbCGR{2Dx71&s)^dE%yi~IYp9JxB`B1xwtlX5V+&#PRz8P8J?IiaP7-Yzt zknRi|#9rJ}6QKqCWo_G~u557jsfW%1Z5{qdOQ08u7smYK_SjITq(e%;V%vQVK?(aO zP0g>mJ;&vzm_x8Uxym0RBblI7s~=5HV(ykwt>+_XIZc5bI{tX$Erq-W66KOfxi zeEjgPAmo@uNDZc?fZ9}VUBZA?&+2LGxc*3&20P<`-*hg5Z!HVVM!Z%PLy|;7-;$A zlzs7_d(|^!+XrUtGWmOa^4N2|m`CDLyrZZDDso9Htg9z=t z`0hn7Vd*g5ppC+yVTSsVW#LJBHHWqaalCsCNiTP8Z#ui&z*#rv@ZMJUr~XMh7Z4}8 zvLpO2ON)|_ZxE_B=a1Th{E5i$JMv(MNxxPR9zu*CEOY*M018frU8^M2v-DF=* z^uH-n{fzT(@ZT}tJ@DNF-#zf%1OFo)2vmKno~6>d4pNC9Lm-mMI6@(Vd?{ymNO(La zW0twGR1nP*^9LAP8FPe!0Fjta&hoW&O2Jy-{wGUCXl#s6;2p9=y% z`_VSE7tg-9Cna@3yRi;vH`l&Va_mb__9yFbn9peIWnG9*03KT-@-MO7fq(D2#NsIxcSOLZoS|3u-Y7(6QDdr3omU9i+~jL8~A`kijc?&1;!G~A+aPRwgOVj85YnG<|OKb zL}5UR1$YNABo2kdWoT;5u~vGNM4pgH3>5}EN@7z;tcD^5dICg3z%ardLZUSwMb&#r zJPL`=(A2>0#?ap-}v5FtI(pHJsShKeOJSyU)b>L(F|$_6VgnGiG;D3ie(R3=lx z3lK5KG$csM5K97>cnW3G6^gE$2pF<}XpT%ABJiV2g*;v;pCgsgB|LwQpG+)* zLIg6pkjDw8^I=s%G@Zi*PDrK0zetKUC>~2?{&c9Z0G66)ojeb>M!=7fibY@-S1b;Z1wv*hK?W+M zkqUm44{G|+J)Om{f>{Y_+z5lZQ9z)6OIOS_P2d}B26V#>-I8Gg9lr6h)f)oP>bG>o z?A!$Yi%vu{zT{kmH!PcA{Bpwq<(CuaIOAD0!TCww0J2Y>fUjTEXt<0riislM=- z&W0I{)f~RhJ2#>HJOx?=+XPw^yC&(ljdxGwN&uY73vd@KUQHPG2|Iv4c>*jrW#d5R z!f>SfiA6GrSSW;Hs2E7o{ds<3f1ad~PXBR6wC^J%R27>9N>KD9<=WI39rib6F63&s zqobisjX+gz03_)bs#UBBM=2LZfGIDa7paVH#L{{rl^4)cV2#CcxEv{uZAA}~HUd_y zgHWRC3jEY`_3D`cSAPviSlF7V;HS+C1fM=?7&;0k&}AG+08d7j1_~ORH9yNsoT!e6 zwu5$P!o<(k3}E`~8O%KBDDZM4&Q)88fJ5yLs)(};wh^qk%6D<_O0^LxWgcKrUMMQU zZld^xHello world from SHELL' > test.pdf +curl http://localhost:8000 -H 'Content-Type: text/html' -d '

Hello world from SHELL

' > "$(dirname $0)/test.pdf" diff --git a/index.js b/index.js deleted file mode 100644 index 93d0fc9..0000000 --- a/index.js +++ /dev/null @@ -1,88 +0,0 @@ -const http = require('http'); -const urlParser = require('url').parse; -const spawn = require('child_process').spawn; -const tempDir = require('os').tmpdir(); -const fileSystem = require('fs'); - -const server = http.createServer((request, response) => { - - const requestPath = urlParser(request.url).pathname; - generatePdf(request, response); - -}).listen(80); - -server.on('error', function (e) { - console.log(e); -}); - -const generatePdf = (request, response) => { - const requestBody = []; - const clientId = (Math.random() * 0x100000000 + 1).toString(36); - - console.info({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'request', - 'message': 'connected', - }); - - request.on('data', (chunk) => { - requestBody.push(chunk); - }); - - request.on('end', () => { - const tempFile = tempDir + '/' + clientId + '.pdf'; - const wkhtmltopdf = spawn('wkhtmltopdf', [ - '--quiet', - '--print-media-type', - '--no-outline', - '-', - tempFile, - ]); - - wkhtmltopdf.stdin.end( - Buffer.concat(requestBody).toString() - ); - - wkhtmltopdf.on('exit', (code) => { - console.info({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'wkhtmltopdf', - 'message': 'exitted with ' + code, - }); - - if (code !== 0) { - response.writeHead(500); - response.end(); - return; - } - - response.writeHead(200); - fileSystem.createReadStream(tempFile).pipe(response).on('end', () => { - fileSystem.unlinkSync(tempFile); - }); - }); - - wkhtmltopdf.stderr.on('data', (chunk) => { - console.warn({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'wkhtmltopdf', - 'message': chunk.toString(), - }); - }); - }); - - request.on('error', (error) => { - console.warn({ - 'timestamp': (new Date).toISOString(), - 'client': clientId, - 'module': 'request', - 'message': error, - }); - - response.writeHead(400); - response.end(); - }); -}; diff --git a/package.json b/package.json new file mode 100644 index 0000000..fdd7d6d --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "name": "html-pdf-export", + "module": "src/index.ts", + "type": "module", + "dependencies": { + "nanoid": "^5.0.6", + "pino": "^8.19.0" + }, + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} diff --git a/src/html-to-pdf-client.ts b/src/html-to-pdf-client.ts new file mode 100644 index 0000000..db2231d --- /dev/null +++ b/src/html-to-pdf-client.ts @@ -0,0 +1,24 @@ +import { spawn, which } from 'bun'; + +export type HtmlToPdfClient = (req: Request, outputPath: string) => Promise; +export const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { + const bin = which('wkhtmltopdf'); + if (bin === null) { + throw new Error('Missing HTML to PDF binary'); + } + const proc = spawn( + ['wkhtmltopdf', '--quiet', '--print-media-type', '--no-outline', '-', outputPath], + {stdin: req, stderr: 'pipe'}, + ); + + const exitCode = await proc.exited; + const errors: string = await Bun.readableStreamToText(proc.stderr); + if (errors) { + throw new Error(errors); + } + + // if no errors but unsuccessful exit code, throw a generic error + if (exitCode !== 0) { + throw new Error(`Failed to convert HTML to PDF, the process exited with code ${exitCode}`); + } +}; diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..16c4b64 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,6 @@ +import { createServer } from './server.ts'; +import { trapShutdown } from './shutdown.ts'; + +const server = createServer(); + +trapShutdown(async () => server.stop()); diff --git a/src/logger.ts b/src/logger.ts new file mode 100644 index 0000000..5e31a8d --- /dev/null +++ b/src/logger.ts @@ -0,0 +1,7 @@ +import pino, { type Logger as PinoLogger } from 'pino'; + +export const loggerUsingPino = () => pino({ + name: 'html-pdf-export', +}); + +export type Logger = () => PinoLogger; diff --git a/src/server.test.ts b/src/server.test.ts new file mode 100644 index 0000000..160759c --- /dev/null +++ b/src/server.test.ts @@ -0,0 +1,63 @@ +import { write } from 'bun'; +import { afterEach, beforeEach, expect, mock, test } from 'bun:test'; +import pino, { type Logger, type LoggerExtras } from 'pino'; +import type { HtmlToPdfClient } from './html-to-pdf-client.ts'; +import { createServer } from './server.ts'; + +mock.module('nanoid', () => ({nanoid: () => 'fake-random-id'})); + +const port = 0; // 0 means give a random unassigned port +const host = 'http://localhost'; +const method = 'POST'; +const body = "

Hello world

"; +const headers = {'content-type': 'text/html'}; +const logger = { + info: mock() as pino.LogFn, + error: mock() as pino.LogFn, + child: mock() as LoggerExtras['child'], +} as Logger; +const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { + const html = await req.text(); + await write(outputPath, html); +}; + +let server: ReturnType; +beforeEach(() => server = createServer({port, htmlToPdfClient, logger: () => logger})); +afterEach(() => server.stop()); + +test('logs request id', async () => { + await server.fetch(new Request(host)); + expect(logger.child).toHaveBeenCalledWith({requestId: 'fake-random-id'}); +}); + +const invalidRequestMethods = ['GET', 'HEAD', 'PUT', 'DELETE', 'CONNECT', 'OPTIONS', 'TRACE', 'PATCH']; +test.each(invalidRequestMethods)('cannot do %s requests', async method => { + const res = await server.fetch(new Request(host, {method})); + expect(res.status).toBe(405); + expect(logger.error).toHaveBeenCalledWith('Invalid request method'); +}); + +test('requires a request body', async () => { + const res = await server.fetch(new Request(host, {method})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Missing request body'); +}); + +test('requires a content-type request header', async () => { + const res = await server.fetch(new Request(host, {method, body})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Missing content-type request header'); +}); + +test('requires a request with text/html content-type header', async () => { + const res = await server.fetch(new Request(host, {method, body, headers: {'content-type': ''}})); + expect(res.status).toBe(400); + expect(logger.error).toHaveBeenCalledWith('Invalid content-type request header'); +}); + +test('success', async () => { + const res = await server.fetch(new Request(host, {method, body, headers})); + expect(res.status).toBe(200); + expect(await res.text()).toBe(body); + expect(res.headers.get('content-type')).toBe('application/pdf'); +}); diff --git a/src/server.ts b/src/server.ts new file mode 100644 index 0000000..59d5452 --- /dev/null +++ b/src/server.ts @@ -0,0 +1,70 @@ +import { file } from 'bun'; +import { nanoid } from 'nanoid'; +import { mkdir, unlink } from 'node:fs/promises'; +import { tmpdir } from 'os'; +import { htmlToPdfClient, type HtmlToPdfClient } from './html-to-pdf-client.ts'; +import { type Logger, loggerUsingPino } from './logger.ts'; + +export interface CreateServerOptions { + port?: number; + logger?: Logger; + htmlToPdfClient?: HtmlToPdfClient; +} + +export const createServer = (options?: CreateServerOptions) => { + const port = options?.port ?? 8000; + const logger = options?.logger?.() ?? loggerUsingPino(); + const client = options?.htmlToPdfClient ?? htmlToPdfClient; + + logger.info(`Listening on port ${port}...`); + + return Bun.serve({ + port, + async fetch(req) { + const requestId = nanoid(); + logger.child({requestId}); + + if (req.method !== 'POST') { + logger.error('Invalid request method'); + return new Response(null, {status: 405}); + } + + if (!req.body) { + logger.error('Missing request body'); + return new Response(null, {status: 400}); + } + + if (!req.headers.has('content-type')) { + logger.error('Missing content-type request header'); + return new Response(null, {status: 400}); + } + + if (req.headers.get('content-type') !== 'text/html') { + logger.error('Invalid content-type request header'); + return new Response(null, {status: 400}); + } + + const tmpDir = process.env.HTML_PDF_EXPORT_TMPDIR ?? tmpdir(); + if (!(await file(tmpDir).exists())) { + logger.info('Temporary file directory not found, creating a new directory'); + await mkdir(tmpDir, {recursive: true}); + } + + const outputPath = `${tmpDir}/${requestId}.pdf`; + const contentLength = req.headers.get('content-length'); + logger.info('Starting conversion of HTML to PDF', {contentLength}); + const startTime = process.hrtime(); + await client(req, outputPath); + const duration = process.hrtime(startTime); + logger.info('Done converting HTML to PDF', {contentLength, duration}); + + const output = file(outputPath); + output.stream().getReader().closed.then(() => unlink(outputPath)); + return new Response(output, {status: 200, headers: {'content-type': 'application/pdf'}}); + }, + error(err) { + logger.error(err); + return new Response(null, {status: 500}); + }, + }); +}; diff --git a/src/shutdown.ts b/src/shutdown.ts new file mode 100644 index 0000000..d401506 --- /dev/null +++ b/src/shutdown.ts @@ -0,0 +1,50 @@ +import { sleep } from 'bun'; +import { type Logger as PinoLogger } from 'pino'; +import { type Logger, loggerUsingPino } from './logger.ts'; + +class ShutdownTimedOutError extends Error { +} + +interface ShutdownOptions { + timeout?: number; + logger?: Logger; +} + +export function trapShutdown(callback: () => Promise, options?: ShutdownOptions) { + process.once("SIGINT", () => handleShutdown(callback, options)); + process.once("SIGTERM", () => handleShutdown(callback, options)); +} + +async function handleShutdown(callback: () => Promise, options?: ShutdownOptions) { + const envTimeout = process.env.HTML_PDF_EXPORT_TIMEOUT ? parseInt(process.env.HTML_PDF_EXPORT_TIMEOUT) : null; + const timeout = options?.timeout ?? envTimeout ?? 10_000; + const logger = options?.logger?.() ?? loggerUsingPino(); + const handleForceExit = forceExit(logger); + + process.on("SIGTERM", handleForceExit); + process.on("SIGINT", handleForceExit); + + try { + await Promise.race([ + sleep(timeout).then(() => { + throw new ShutdownTimedOutError(); + }), + callback(), + ]); + process.exit(0); + } catch (e) { + if (e instanceof ShutdownTimedOutError) { + logger.warn("Shutdown handler timed out, quitting forcefully"); + } else { + logger.error(e, "Error during shutdown handling"); + } + process.exit(1); + } +} + +function forceExit(logger: PinoLogger) { + return (signal: number): void => { + logger.error(`Received second signal ${signal}, exiting NOW`); + process.exit(1); + }; +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..0fef23a --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +} From d67957b13daa3dceb750e056671782935a8f9be2 Mon Sep 17 00:00:00 2001 From: Hikariii Date: Thu, 18 Apr 2024 12:05:52 +0200 Subject: [PATCH 3/3] Stream requestbody to file and convert from file fixes [sc-35362] --- src/bun.lockb | 0 src/html-to-pdf-client.ts | 39 +++++++++++++++++++--- src/index.ts | 2 +- src/logger.ts | 5 +-- src/package.json | 0 src/server.ts | 68 ++++++++++++++++++++++++++++----------- src/shutdown.ts | 5 ++- 7 files changed, 89 insertions(+), 30 deletions(-) create mode 100755 src/bun.lockb create mode 100755 src/package.json diff --git a/src/bun.lockb b/src/bun.lockb new file mode 100755 index 0000000..e69de29 diff --git a/src/html-to-pdf-client.ts b/src/html-to-pdf-client.ts index db2231d..2127c0a 100644 --- a/src/html-to-pdf-client.ts +++ b/src/html-to-pdf-client.ts @@ -1,17 +1,39 @@ -import { spawn, which } from 'bun'; +import { spawn, which, file } from 'bun'; +import { PinoLogger } from './logger.ts'; -export type HtmlToPdfClient = (req: Request, outputPath: string) => Promise; -export const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { +export type HtmlToPdfClient = (inputPath: string, outputPath: string, logger: PinoLogger, timeout?: number) => Promise; +export const htmlToPdfClient: HtmlToPdfClient = async (inputPath, outputPath, logger, timeout = 10_000) => { const bin = which('wkhtmltopdf'); if (bin === null) { throw new Error('Missing HTML to PDF binary'); } + + const inputFile = Bun.file(inputPath); + if (!await inputFile.exists()) { + throw new Error(`Html for conversion ${inputPath} does not exist or is not readable`) + } + + const htmlSize = inputFile.size; + if (htmlSize < 1) { + throw new Error(`Html file-size for conversion ${inputPath} is smaller than 1 byte`) + } + + logger.info(`Starting conversion of HTML to PDF from ${inputPath}, size: ${htmlSize}`); + const proc = spawn( - ['wkhtmltopdf', '--quiet', '--print-media-type', '--no-outline', '-', outputPath], - {stdin: req, stderr: 'pipe'}, + ['wkhtmltopdf', '--log-level', 'warn', '--print-media-type', '--disable-javascript', '--no-outline', inputPath, outputPath], + {stderr: 'pipe'}, ); + const timer = setTimeout(() => { + proc.kill(129); + throw new Error(`Timing out after ${timeout} ms while calling wkhtmltopdf, killing the process manually`); + }, timeout); + const exitCode = await proc.exited; + + clearTimeout(timer); + const errors: string = await Bun.readableStreamToText(proc.stderr); if (errors) { throw new Error(errors); @@ -21,4 +43,11 @@ export const htmlToPdfClient: HtmlToPdfClient = async (req, outputPath) => { if (exitCode !== 0) { throw new Error(`Failed to convert HTML to PDF, the process exited with code ${exitCode}`); } + + const outputFile = file(outputPath); + const pdfSize = outputFile.size; + if (pdfSize < 1) { + throw new Error(`PDF file-size for conversion ${outputPath} is smaller than 1 byte`) + } + logger.info(`created PDF output with size: ${pdfSize}`); }; diff --git a/src/index.ts b/src/index.ts index 16c4b64..4690c85 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,6 +1,6 @@ import { createServer } from './server.ts'; import { trapShutdown } from './shutdown.ts'; -const server = createServer(); +const server = await createServer(); trapShutdown(async () => server.stop()); diff --git a/src/logger.ts b/src/logger.ts index 5e31a8d..dfb9d75 100644 --- a/src/logger.ts +++ b/src/logger.ts @@ -1,7 +1,8 @@ -import pino, { type Logger as PinoLogger } from 'pino'; +import pino from 'pino'; export const loggerUsingPino = () => pino({ name: 'html-pdf-export', }); -export type Logger = () => PinoLogger; +export type PinoLogger = typeof pino; +export type LoggerFactory = () => PinoLogger; diff --git a/src/package.json b/src/package.json new file mode 100755 index 0000000..e69de29 diff --git a/src/server.ts b/src/server.ts index 59d5452..f2b0646 100644 --- a/src/server.ts +++ b/src/server.ts @@ -2,20 +2,58 @@ import { file } from 'bun'; import { nanoid } from 'nanoid'; import { mkdir, unlink } from 'node:fs/promises'; import { tmpdir } from 'os'; +import { ReadableStream } from "stream/web"; import { htmlToPdfClient, type HtmlToPdfClient } from './html-to-pdf-client.ts'; -import { type Logger, loggerUsingPino } from './logger.ts'; +import { type LoggerFactory, loggerUsingPino, PinoLogger } from './logger.ts'; export interface CreateServerOptions { port?: number; - logger?: Logger; + logger?: LoggerFactory; htmlToPdfClient?: HtmlToPdfClient; } -export const createServer = (options?: CreateServerOptions) => { +const convertHtmlToPdf = async( + requestBody: ReadableStream, + requestId: string, + tmpDir: string, + client: HtmlToPdfClient, + logger: PinoLogger +) => { + const outputPath = `${tmpDir}/${requestId}.pdf`; + const inputPath = `${tmpDir}/${requestId}.html`; + + logger.info(`Writing request body to file: ${inputPath}`); + await Bun.write(inputPath, await Bun.readableStreamToBlob(requestBody)); + + const startTime = process.hrtime(); + + await client(inputPath, outputPath, logger); + + const duration = process.hrtime(startTime); + logger.info(`Done converting HTML to PDF in ${duration}`); + + const pdfOutput = file(outputPath); + logger.info(`created output size: ${pdfOutput.size}`); + + pdfOutput.stream().getReader().closed.then(async() => { + await unlink(outputPath); + await unlink(inputPath); + }); + + return pdfOutput; +}; + +export const createServer = async(options?: CreateServerOptions) => { const port = options?.port ?? 8000; const logger = options?.logger?.() ?? loggerUsingPino(); const client = options?.htmlToPdfClient ?? htmlToPdfClient; + const tmpDir = process.env.HTML_PDF_EXPORT_TMPDIR ?? tmpdir(); + if (!(await file(tmpDir).exists())) { + logger.info('Temporary file directory not found, creating a new directory'); + await mkdir(tmpDir, {recursive: true}); + } + logger.info(`Listening on port ${port}...`); return Bun.serve({ @@ -44,23 +82,15 @@ export const createServer = (options?: CreateServerOptions) => { return new Response(null, {status: 400}); } - const tmpDir = process.env.HTML_PDF_EXPORT_TMPDIR ?? tmpdir(); - if (!(await file(tmpDir).exists())) { - logger.info('Temporary file directory not found, creating a new directory'); - await mkdir(tmpDir, {recursive: true}); - } + const pdfOutput = await convertHtmlToPdf( + req.body, + requestId, + tmpDir, + client, + logger, + ); - const outputPath = `${tmpDir}/${requestId}.pdf`; - const contentLength = req.headers.get('content-length'); - logger.info('Starting conversion of HTML to PDF', {contentLength}); - const startTime = process.hrtime(); - await client(req, outputPath); - const duration = process.hrtime(startTime); - logger.info('Done converting HTML to PDF', {contentLength, duration}); - - const output = file(outputPath); - output.stream().getReader().closed.then(() => unlink(outputPath)); - return new Response(output, {status: 200, headers: {'content-type': 'application/pdf'}}); + return new Response(pdfOutput, {status: 200, headers: {'content-type': 'application/pdf'}}); }, error(err) { logger.error(err); diff --git a/src/shutdown.ts b/src/shutdown.ts index d401506..ce79f39 100644 --- a/src/shutdown.ts +++ b/src/shutdown.ts @@ -1,13 +1,12 @@ import { sleep } from 'bun'; -import { type Logger as PinoLogger } from 'pino'; -import { type Logger, loggerUsingPino } from './logger.ts'; +import { type LoggerFactory, loggerUsingPino, PinoLogger } from './logger.ts'; class ShutdownTimedOutError extends Error { } interface ShutdownOptions { timeout?: number; - logger?: Logger; + logger?: LoggerFactory; } export function trapShutdown(callback: () => Promise, options?: ShutdownOptions) {