-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodetopdf.fsx
More file actions
509 lines (433 loc) · 16.7 KB
/
codetopdf.fsx
File metadata and controls
509 lines (433 loc) · 16.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
#r "nuget: MAB.DotIgnore"
#r "nuget: ceTe.DynamicPDF.Converter.NET"
#r "nuget: ceTe.DynamicPDF.CoreSuite.NET"
#r "nuget: ceTe.DynamicPDF.HtmlConverter.NET"
#r "nuget: Microsoft.Playwright"
#r "nuget: Jering.Web.SyntaxHighlighters.HighlightJS"
open System
open System.IO
open System.Text
open System.Threading
open System.Text.Json
open System.Diagnostics
open System.Collections.Generic
open System.Security.Cryptography
open System.Text.RegularExpressions
open MAB.DotIgnore
open ceTe.DynamicPDF.Conversion
open Microsoft.Playwright
open Jering.Web.SyntaxHighlighters.HighlightJS
type ResultBuilder() =
member inline _.Bind(x, f) = Result.bind f x
member inline _.Return(x) = Ok(x)
member inline _.ReturnFrom(x: Result<_, _>) = x
let result = ResultBuilder()
let highlight code language =
StaticHighlightJSService.HighlightAsync(code, language).Result
let fromSourceDir path =
Path.Combine(__SOURCE_DIRECTORY__, path)
let rec rmdir path attempts =
try
Directory.Delete(path, true)
with :? IOException ->
if attempts = 1 then
reraise ()
else
Thread.Sleep(1000)
rmdir path (attempts - 1)
let tryRmdir path attempts =
try
rmdir path attempts
Ok()
with (:? IOException | :? UnauthorizedAccessException) as ex ->
Error ex.Message
let md5 (string: string) =
use md5 = MD5.Create()
let bytes = Encoding.Default.GetBytes(string)
md5.ComputeHash(bytes) |> BitConverter.ToString |> _.Replace("-", "")
let generateFileId =
let fileIdMap = Dictionary()
fun filePath ->
match fileIdMap.TryGetValue(filePath) with
| true, id -> id
| false, _ ->
let value = $"file-{md5 filePath}"
fileIdMap[filePath] <- value
value
let execShellCmd entrypoint args =
use proc =
new Process(
StartInfo =
ProcessStartInfo(
FileName = entrypoint,
Arguments = args,
UseShellExecute = false,
CreateNoWindow = true
)
)
proc.Start() |> ignore
proc.WaitForExit()
let cloneRepo host repo =
printfn "Cloning repository: %s/%s ..." host repo
let tmpDir = Path.Combine(Directory.GetCurrentDirectory(), repo.Replace("/", "-"))
let clone () =
execShellCmd "git" $"clone https://{host}/{repo}.git {tmpDir}" |> ignore
try
if Path.Exists(tmpDir) then
rmdir tmpDir 3
clone ()
Ok tmpDir
with ex ->
Error ex.Message
type Tree<'n, 't> =
| Node of 'n * Tree<'n, 't> list
| Tip of 't
let rec flatten tree =
match tree with
| Node(_, children) -> children |> Seq.collect flatten
| Tip t -> Seq.singleton t
let rec generateDirectoryTrees excluded (dir: DirectoryInfo) =
dir.EnumerateFileSystemInfos()
|> Seq.choose (function
| :? DirectoryInfo as subdir when not <| excluded subdir.Name true ->
Some(generateDirectoryTree excluded subdir)
| :? FileInfo as file when not <| excluded file.Name false -> Some(Tip file)
| _ -> None)
|> Seq.toList
and generateDirectoryTree excluded dir =
Node(dir.Name, generateDirectoryTrees excluded dir)
let renderDirectoryTree maxFileSize (output: StringBuilder) trees =
let rec recurse prefix trees =
let count = List.length trees
trees
|> List.iteri (fun i tree ->
let isLast = i = count - 1
let connector = if isLast then "└── " else "├── "
match tree with
| Node(entry, children) ->
output.Append($"{prefix}{connector}{entry}/\n") |> ignore
recurse (prefix + (if isLast then " " else "| ")) children
| Tip(fileInfo: FileInfo) ->
let string =
if fileInfo.Length <= maxFileSize then
let fileId = generateFileId fileInfo.FullName
$"{prefix}{connector}<a href=\"#{fileId}\">{fileInfo.Name}</a>\n"
else
$"{prefix}{connector}%s{fileInfo.Name} (skip oversized file)\n"
output.Append(string) |> ignore)
recurse "" trees
let getLanguage (languageMap: IReadOnlyDictionary<string, string>) (ext: string) =
languageMap.GetValueOrDefault(ext[1..].ToLowerInvariant(), "text")
let loadLanguageMap path =
try
use stream = File.OpenRead(path)
JsonSerializer.Deserialize<Dictionary<string, string>>(stream).AsReadOnly()
|> Ok
with (:? IOException | :? JsonException) as ex ->
Error ex.Message
let loadStylesheet path =
try
Ok(File.ReadAllText(path))
with :? IOException as ex ->
Error ex.Message
let loadIgnoreFile path =
try
path |> File.ReadAllLines |> IgnoreList |> Ok
with :? IOException as ex ->
Error ex.Message
let mkExcluded (ignorePath: string option) =
match ignorePath with
| Some path ->
loadIgnoreFile path
|> Result.map (fun ignore path isDirectory -> ignore.IsIgnored(path, isDirectory, null) || path = ".git")
| None -> Ok(fun path _ -> path = ".git")
let generateHtml maxFileSize stylesheet excluded getLanguage dir title =
let result = StringBuilder()
let directoryTrees = generateDirectoryTrees excluded (DirectoryInfo(dir))
result
.Append("""<!DOCTYPE html><html><head><meta charset="UTF-8">""")
.Append($"<style>{stylesheet}</style>")
.Append("</head><body>")
.Append($"<h1>{title}</h1>")
.Append("""<h2>Directory Structure</h2><div class="directory-tree">""")
|> ignore
renderDirectoryTree maxFileSize result directoryTrees
result.Append("</div>").Append("<h2>Contents</h2>") |> ignore
directoryTrees
|> Seq.collect flatten
|> Seq.filter (fun file -> file.Length <= maxFileSize)
|> Seq.iter (fun file ->
use stream = file.OpenRead()
use reader = new StreamReader(stream)
let code = reader.ReadToEnd()
let fileId = generateFileId file.FullName
let language = getLanguage file.Extension
let highlightedCode = highlight code language
result
.Append("""<div class="file-container">""")
.Append($"""<h3 id="{fileId}">{file.FullName}</h3>""")
.Append($"""<pre><code class="hljs language-{language}">{highlightedCode}</code></pre>""")
.Append("</div>")
|> ignore)
result.Append("</body></html>") |> ignore
string result
type Input =
| Local of path: string
| Remote of host: string * repo: string * removeDir: bool
type Output =
| Html
| Pdf
| PdfUsingBrowser
type Args =
{ LanguageMapPath: string
StylesheetPath: string
IgnorePath: string option
Input: Input
Output: Output
OutFile: string option
MaxFileSize: int }
type Target =
| GetHelp
| Work of Args
type Reading =
| ReadingLanguageMap
| ReadingStylesheet
| ReadingIgnore
| ReadingOutFile
| ReadingMaxFileSize
type ArgsBuilder =
{ Argv: string list
Reading: Reading option
LanguageMapPath: string option
StylesheetPath: string option
RemoveClonedRepo: bool
IgnorePath: string option
Input: Input option
Output: Output option
OutFile: string option
MaxFileSize: int option }
let mkArgs builder : Result<Args, string> =
let mk input =
{ LanguageMapPath = builder.LanguageMapPath |> Option.defaultValue (fromSourceDir "extmap.json")
StylesheetPath = builder.StylesheetPath |> Option.defaultValue (fromSourceDir "styles.css")
IgnorePath = builder.IgnorePath
Input = input
Output = builder.Output |> Option.defaultValue Pdf
OutFile = builder.OutFile
MaxFileSize = builder.MaxFileSize |> Option.defaultValue (1024 * 1000) }
|> Ok
match builder.Input with
| None -> Error "expect input path"
| Some(Local _) when builder.RemoveClonedRepo -> Error "-u must come with a github repo"
| Some(Remote(host, repo, _)) -> mk (Remote(host, repo, builder.RemoveClonedRepo))
| Some input -> mk input
let localOrRemote =
let githubPatterns =
[ @"^https://([a-zA-Z\d\.]+)/([^/]+\/[^/]+)$"
@"^git@([a-zA-Z\d\.]+):([^/]+\/[^/]+)\.git$" ]
|> List.map Regex
fun string ->
let rec iter (githubPatterns: Regex list) =
match githubPatterns with
| pattern :: rest ->
let m = pattern.Match(string)
if m.Success then
Remote(m.Groups[1].Value, m.Groups[2].Value.Replace(".git", ""), false)
else
iter rest
| [] -> Local string
iter githubPatterns
let parseArgv argv =
let (|Output|_|) arg =
match arg with
| "-html" -> Some Html
| "-pdf" -> Some Pdf
| _ -> None
let rec iter builder : Result<Target, string> =
match builder.Argv with
| [] ->
match builder.Reading with
| Some _ -> Error "expect more arguments"
| None -> mkArgs builder |> Result.map Work
| arg :: rest ->
let builder = { builder with Argv = rest }
match builder.Reading with
| Some reading ->
let builder = { builder with Reading = None }
match reading with
| ReadingLanguageMap ->
iter
{ builder with
LanguageMapPath = Some arg }
| ReadingStylesheet ->
iter
{ builder with
StylesheetPath = Some arg }
| ReadingIgnore -> iter { builder with IgnorePath = Some arg }
| ReadingOutFile -> iter { builder with OutFile = Some arg }
| ReadingMaxFileSize ->
match Int32.TryParse(arg) with
| true, maxFileSize ->
iter
{ builder with
MaxFileSize = Some(maxFileSize * 1024) }
| false, _ -> Error "max file size must be an integer"
| None ->
match arg with
| Output format ->
match builder.Output with
| Some _ -> Error "output format specified twice"
| None -> iter { builder with Output = Some format }
| "-map" ->
match builder.LanguageMapPath with
| Some _ -> Error "language map path specified twice"
| None ->
iter
{ builder with
Reading = Some ReadingLanguageMap }
| "-style" ->
match builder.StylesheetPath with
| Some _ -> Error "stylesheet path specified twice"
| None ->
iter
{ builder with
Reading = Some ReadingStylesheet }
| "-o" ->
match builder.OutFile with
| Some _ -> Error "output filename specified twice"
| None ->
iter
{ builder with
Reading = Some ReadingOutFile }
| "-u" ->
if builder.RemoveClonedRepo then
Error "duplicate -u option"
else
iter { builder with RemoveClonedRepo = true }
| "-fsize" ->
match builder.MaxFileSize with
| Some _ -> Error "max file size specified twice"
| None ->
iter
{ builder with
Reading = Some ReadingMaxFileSize }
| "-b" ->
match builder.Output with
| Some Pdf
| None ->
iter
{ builder with
Output = Some PdfUsingBrowser }
| Some Html -> Error "-b mustn't be used with -html"
| Some PdfUsingBrowser -> Error "duplicate -b option"
| "-help" -> Ok GetHelp
| _ ->
match builder.Input with
| Some _ -> Error "intput argument specified twice"
| None ->
iter
{ builder with
Input = Some(localOrRemote arg) }
{ Argv = List.ofSeq argv
Reading = None
LanguageMapPath = None
StylesheetPath = None
RemoveClonedRepo = false
IgnorePath = None
Input = None
Output = None
OutFile = None
MaxFileSize = None }
|> iter
let findGitignore path =
if not <| Directory.Exists(path) then
Error $"path {path} does not exist"
else
let gitignorePath = Path.Combine(path, ".gitignore")
if File.Exists(gitignorePath) then
Ok(path, Some gitignorePath)
else
Ok(path, None)
// Returns .gitgnore path (if any)
let prepareDirectory input =
match input with
| Local path -> findGitignore path
| Remote(host, repo, _) -> cloneRepo host repo |> Result.bind findGitignore
let mkTitle input =
match input with
| Local path -> $"Local directory: {path}"
| Remote(host, repo, _) -> $"Remote: {host}/{repo}"
let save html outFile input output =
let barePath =
outFile
|> Option.defaultWith (fun () ->
match input with
| Local path -> path
| Remote(_, repo, _) -> repo.Replace("/", "-"))
try
match output with
| Html -> File.WriteAllText(barePath + ".html", html)
| Pdf ->
let options = HtmlConversionOptions(false)
let converter = HtmlConverter(html, options)
converter.Convert(barePath + ".pdf")
| PdfUsingBrowser ->
File.WriteAllText(barePath + ".html", html)
task {
use! playwright = Playwright.CreateAsync()
let! browser = playwright.Chromium.LaunchAsync(BrowserTypeLaunchOptions(Headless = true))
let! page = browser.NewPageAsync()
let! _ = page.GotoAsync($"""file:///{barePath}.html""")
let! _ = page.PdfAsync(PagePdfOptions(Path = barePath + ".pdf", Format = "A4"))
do! browser.CloseAsync()
return ()
}
|> _.Result
File.Delete(barePath + ".html")
Ok()
with ex ->
Error ex.Message
let runResult =
function
| Ok() -> ()
| Error message -> eprintfn "Error: %s" message
[<Literal>]
let help =
"""dotnet fsi codetopdf <local-path/remote-repo> [-h] [-u] [-html] [-pdf] [-b] [-map <path>] [-style <path>] [-ignore <path>] [-o <path-without-extension>] [-fsize <int>]
-local-path: a path
-github-repo: https://<host>/<owner>/<repo> / git@<host>:<owner>/<repo>.git
-help: display this message. omit any other option
-u: remove cloned repo after generation. used in conjunction with <remote-repo>
-html: generate html output
-pdf: generate pdf output, which is the default
-b: generate pdf using a browser
-map: specify a json file use to determine file's language from its extension. default to "extmap.json"
-style: specify a css file injected into the output. default to "styles.css"
-ignore: specify a .gitignore-like file to exclude unwanted files. default to directory's .gitignore (if it has)
-fsize: specify the max file size (KB). default to 1000KB.
-o: output the production file to path-without-extension + .html/.pdf"""
let work (args: Args) =
result {
let! languageMap = loadLanguageMap args.LanguageMapPath
let! stylesheet = loadStylesheet args.StylesheetPath
let! dir, gitignorePath = prepareDirectory args.Input
let! excluded = args.IgnorePath |> Option.orElse gitignorePath |> mkExcluded
let html =
generateHtml args.MaxFileSize stylesheet excluded (getLanguage languageMap) dir (mkTitle args.Input)
do! save html args.OutFile args.Input args.Output
do!
match args.Input with
| Remote(_, _, true) -> tryRmdir dir 3
| _ -> Ok()
printfn "Ok"
return ()
}
do
result {
match! parseArgv (Environment.GetCommandLineArgs()[2..]) with // Ignore "fsi codetopdf.fsx".
| Work args -> return! work args
| GetHelp ->
printfn "%s" help
return ()
}
|> runResult