|
|
|
module internal DataServices
|
|
|
|
|
|
|
|
open SystemServices
|
|
|
|
open DataStructuring
|
|
|
|
open DataProcessing
|
|
|
|
open DataCleaning
|
|
|
|
open System.Threading
|
|
|
|
open System
|
|
|
|
open DataAccess
|
|
|
|
|
|
|
|
let CheckFileExists filePath =
|
|
|
|
ListSampleFiles
|
|
|
|
|> Array.contains filePath
|
|
|
|
|
|
|
|
let rec GenerateMarkovChain (map: Map<string, string List>) (state:string) chain =
|
|
|
|
if map.ContainsKey state then
|
|
|
|
let nextChoice = map.[state] |> PickRandomItem
|
|
|
|
if MatchText @"(\.|\?|\!)$" nextChoice then nextChoice :: chain
|
|
|
|
else
|
|
|
|
let currentWords =
|
|
|
|
state
|
|
|
|
|> SplitText @"\s+"
|
|
|
|
|> Seq.skip 1
|
|
|
|
|> ConcatToString
|
|
|
|
GenerateMarkovChain map (CombineWords currentWords nextChoice) (nextChoice :: chain)
|
|
|
|
else
|
|
|
|
let fallbackChoice = (PickRandomItem map).Key
|
|
|
|
(String.Format("{0}.", fallbackChoice)) :: chain
|
|
|
|
|
|
|
|
let GenerateMarkovSentence map start =
|
|
|
|
GenerateMarkovChain map start [start]
|
|
|
|
|> List.rev
|
|
|
|
|> ConcatToString
|
|
|
|
|
|
|
|
(* Note: Thread.Sleep (Random Number Generation)
|
|
|
|
==================================================================================================================
|
|
|
|
Thread.Sleep is needed to yield better random numbers. Without it, the same random number tends to be used.
|
|
|
|
When debugging (I.E. the observer effect?), enough time passes for a new random number to generate --
|
|
|
|
meaning a new seed or "start word" for each sentence.
|
|
|
|
This isn't ideal but be careful when attempting to remove this line.
|
|
|
|
|
|
|
|
Note: Extra Sentence Generation
|
|
|
|
==================================================================================================================
|
|
|
|
You might notice, from time to time, the for-loop produces one more sentence than specified.
|
|
|
|
This happens when the starting point (startWords) contains a 'full sentence' within it.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
--------
|
|
|
|
(Refer to code for in-depth breakdown of data types/structures.)
|
|
|
|
1. 'Separate Starts Words' from 'map'.
|
|
|
|
[
|
|
|
|
(1)["This is a sentence." ["word 1"; "word 2"; Etc.]]
|
|
|
|
(2)["This is another" ["word 1"; "word 2"; Etc.]]
|
|
|
|
(3)["No full sentence" ["word 1"; "word 2"; Etc.]]
|
|
|
|
]
|
|
|
|
2. In this case, if (1) is selected, we already have a full sentence before we have entered the for-loop.
|
|
|
|
3. Because the code has not entered the for-loop yet, the 'sentence count' is still at 0.
|
|
|
|
4. When the code enters the loop, the 'start words' are used to generate the 'FIRST' sentence of the loop.
|
|
|
|
5. The 'start words' are then appended on to the sentence to complete the generated sentence.
|
|
|
|
6. Now we have two sentences but the loop has not completed it cycle, so it creates, what is now, an extra one.
|
|
|
|
|
|
|
|
As this NuGet package is about generating random text, this should not be a problem for the most part.
|
|
|
|
With that said, when this happens, it is an incorrect result -- from the end-users point-of-view.
|
|
|
|
So, to get a more accurate result, pass the result of this function through to the RemoveArtifactSentences function.
|
|
|
|
*)
|
|
|
|
let GenerateMarkovText noOfSentences map =
|
|
|
|
let startWords = fst(SeperateStartWords map)
|
|
|
|
let result =
|
|
|
|
seq {
|
|
|
|
for i in 1 .. noOfSentences do
|
|
|
|
Thread.Sleep(100)
|
|
|
|
yield GenerateMarkovSentence map (PickRandomItem startWords).Key
|
|
|
|
}
|
|
|
|
result |> ConcatToString
|
|
|
|
|
|
|
|
(* Note: RemoveArtifactSentences Breakdown
|
|
|
|
==================================================================================================================
|
|
|
|
This function receives a string and an int as its parameters.
|
|
|
|
The text being the string and the int being the total of intended sentences.
|
|
|
|
When using the GenerateMarkovText function, it can sometimes produce an unexpected extra sentence.
|
|
|
|
(See the GenerateMarkovText-comment above for further details.)
|
|
|
|
For the most part, the outcome of this "mishap" is fine (this is a random word generator, not a space rocket).
|
|
|
|
With that said, run the generated text through this function if you want "clean" results.
|
|
|
|
It will mean it will take longer to process the results but it does reduce the chance of incorrect output.
|
|
|
|
("Incorrect" from the end-users point-of-view.)
|
|
|
|
The ReplaceArtifact function counteracts the effects of the text splitting.
|
|
|
|
SplitText splits the text and adds a space at the split.
|
|
|
|
This leads to the text having double-spaces when concatenated back together.
|
|
|
|
*)
|
|
|
|
let RemoveArtefactSentences noOfSentences text =
|
|
|
|
text
|
|
|
|
|> SplitText @"(?<=[\.\!\?]\s)"
|
|
|
|
|> Array.take noOfSentences
|
|
|
|
|> ConcatToString
|
|
|
|
|> ReplaceArtifact " "
|
|
|
|
|> TrimEnd [|' '|]
|