The purpose of this repository is to provide a way for people to generate random "placeholder text" -- with a Markov Chain.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
2.0 KiB

module internal DataProcessing
open System.Text.RegularExpressions
open DataCleaning
open System
open System.IO
let MatchText pattern text = Regex.IsMatch(text, pattern)
let ConcatToString words = String.concat " " words
let GibberishLevelIsValid gibberishLevel =
match gibberishLevel with
| gibberishLevel when gibberishLevel < 2 || gibberishLevel > 20 ->
invalidArg "gibberishLevel" "Invalid argument. Must be between 2 and 20 (inclusive)."
| _ -> ignore
let SentencesIsValid sentences =
match sentences with
| sentences when sentences < 1 ->
invalidArg "sentences" "Invalid argument. Must be greater than 0."
| _ -> ignore
let FilePathIsValid filePath =
match filePath with
| filePath when Path.GetExtension filePath <> ".txt" ->
invalidArg "filePath" "Invalid argument. File must be a .txt file."
| filePath when not (File.Exists filePath) ->
raise (FileNotFoundException("Unable to find the file at the location specified."))
| _ -> ignore
let TextContainsValidEndToken (text: string) =
match text with
| text when text.Contains "." -> true
| text when text.Contains "!" -> true
| text when text.Contains "?" -> true
| _ -> false
let SortIntoGroups groupSize text =
SplitText @"\s+" text // Splits text where there is a space.
|> Seq.windowed groupSize
let BisectWords words =
let length = Array.length words
let start =
|> Seq.take (length - 1)
|> ConcatToString
(start, words.[length - 1])
let CombineWords prev next =
[prev; next]
|> List.filter(fun s -> not (String.IsNullOrWhiteSpace s))
|> ConcatToString
let ApplyStandardSetup text =
|> ReplaceArtifact "\""
|> ReplaceArtifact "\n\nIn"
|> ReplaceArtifact "\r"
|> ReplaceArtifact "\n"
|> SplitText @"\s+"
|> ConcatToString