The purpose of this repository is to provide a way for people to generate random "placeholder text" -- with a Markov Chain.
https://www.craigoates.net/Software/project/12
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
63 lines
2.0 KiB
63 lines
2.0 KiB
module internal DataProcessing |
|
|
|
open System.Text.RegularExpressions |
|
open DataCleaning |
|
open System |
|
open System.IO |
|
|
|
let MatchText pattern text = Regex.IsMatch(text, pattern) |
|
|
|
let ConcatToString words = String.concat " " words |
|
|
|
let GibberishLevelIsValid gibberishLevel = |
|
match gibberishLevel with |
|
| gibberishLevel when gibberishLevel < 2 || gibberishLevel > 20 -> |
|
invalidArg "gibberishLevel" "Invalid argument. Must be between 2 and 20 (inclusive)." |
|
| _ -> ignore |
|
|
|
let SentencesIsValid sentences = |
|
match sentences with |
|
| sentences when sentences < 1 -> |
|
invalidArg "sentences" "Invalid argument. Must be greater than 0." |
|
| _ -> ignore |
|
|
|
let FilePathIsValid filePath = |
|
match filePath with |
|
| filePath when Path.GetExtension filePath <> ".txt" -> |
|
invalidArg "filePath" "Invalid argument. File must be a .txt file." |
|
| filePath when not (File.Exists filePath) -> |
|
raise (FileNotFoundException("Unable to find the file at the location specified.")) |
|
| _ -> ignore |
|
|
|
let TextContainsValidEndToken (text: string) = |
|
match text with |
|
| text when text.Contains "." -> true |
|
| text when text.Contains "!" -> true |
|
| text when text.Contains "?" -> true |
|
| _ -> false |
|
|
|
let SortIntoGroups groupSize text = |
|
SplitText @"\s+" text // Splits text where there is a space. |
|
|> Seq.windowed groupSize |
|
|
|
let BisectWords words = |
|
let length = Array.length words |
|
let start = |
|
words |
|
|> Seq.take (length - 1) |
|
|> ConcatToString |
|
(start, words.[length - 1]) |
|
|
|
let CombineWords prev next = |
|
[prev; next] |
|
|> List.filter(fun s -> not (String.IsNullOrWhiteSpace s)) |
|
|> ConcatToString |
|
|
|
let ApplyStandardSetup text = |
|
text |
|
|> ReplaceArtifact "\"" |
|
|> ReplaceArtifact "\n\nIn" |
|
|> ReplaceArtifact "\r" |
|
|> ReplaceArtifact "\n" |
|
|> SplitText @"\s+" |
|
|> ConcatToString |