The purpose of this repository is to provide a way for people to generate random "placeholder text" -- with a Markov Chain.
https://www.craigoates.net/Software/project/12
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
81 lines
3.6 KiB
81 lines
3.6 KiB
module internal DataServices |
|
|
|
open SystemServices |
|
open DataStructuring |
|
open DataProcessing |
|
open DataCleaning |
|
open System.Threading |
|
open System |
|
open DataAccess |
|
|
|
let CheckFileExists filePath = |
|
ListSampleFiles |
|
|> Array.contains filePath |
|
|
|
let rec GenerateMarkovChain (map: Map<string, string List>) (state:string) chain = |
|
if map.ContainsKey state then |
|
let nextChoice = map.[state] |> PickRandomItem |
|
if MatchText "(\.|\?|\!)$" nextChoice then nextChoice :: chain |
|
else |
|
let currentWords = |
|
state |
|
|> SplitText "\s+" |
|
|> Seq.skip 1 |
|
|> ConcatToString |
|
GenerateMarkovChain map (CombineWords currentWords nextChoice) (nextChoice :: chain) |
|
else |
|
let fallbackChoice = (PickRandomItem map).Key |
|
(String.Format("{0}.", fallbackChoice)) :: chain |
|
|
|
let GenerateMarkovSentence map start = |
|
GenerateMarkovChain map start [start] |
|
|> List.rev |
|
|> ConcatToString |
|
|
|
(* Note: Thread.Sleep (Random Number Generation) |
|
================================================================================================================== |
|
Thread.Sleep is needed yield better random numbers. Without it, the same random number tends to be used. |
|
When debugging (I.E. the observer effect?), enough time passes for a new random number to generate -- |
|
meaning a new seed or "start word" for each sentence. |
|
This isn't ideal but be careful when attempting to remove this line. |
|
|
|
Note: Extra Sentence Generation |
|
================================================================================================================== |
|
You might notice, from time to time, the for-loop produces one more sentence than specified. |
|
This happens when the starting point (startWords) contains a 'full sentence' within it. |
|
|
|
Example: |
|
-------- |
|
(Refer to code for in-depth breakdown of data types/structures.) |
|
1. 'Separate Starts Words' from 'map'. |
|
[ |
|
(1)["This is a sentence." ["word 1"; "word 2"; Etc.]] |
|
(2)["This is another" ["word 1"; "word 2"; Etc.]] |
|
(3)["No full sentence" ["word 1"; "word 2"; Etc.]] |
|
] |
|
2. In this case, if (1) is selected, we already have a full sentence before we have entered the for-loop. |
|
3. Because the code has not entered the for-loop yet, the 'sentence count' is still at 0. |
|
4. When the code enters the loop, the 'start words' are used to generate the 'FIRST' sentence of the loop. |
|
5. The 'start words' are then appended on to the sentence to complete the generated sentence. |
|
6. Now we have two sentences but the loop has not completed it cycle, so it creates, what is now, an extra one. |
|
|
|
As this NuGet package is about generating random text, this should not be a problem for the most part. |
|
With that said, when this happens, it is an incorrect result -- from the end-users point-of-view. |
|
So, to get a more accurate result, pass the result of this function through to the RemoveArtifactSentences function. |
|
*) |
|
let GenerateMarkovText noOfSentences map = |
|
let startWords = fst(SeperateStartWords map) |
|
let result = |
|
seq { |
|
for i in 1 .. noOfSentences do |
|
Thread.Sleep(100) |
|
yield GenerateMarkovSentence map (PickRandomItem startWords).Key |
|
} |
|
result |> ConcatToString |
|
|
|
let RemoveArtifactSentences noOfSentences text = |
|
text |
|
|> SplitText @"(?<=[\.\!\?]\s)" |
|
|> Array.take noOfSentences |
|
|> ConcatToString |
|
|> ReplaceArtifact " " |