The purpose of this repository is to provide a way for people to generate random "placeholder text" -- with a Markov Chain.
https://www.craigoates.net/Software/project/12
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
233 lines
8.7 KiB
233 lines
8.7 KiB
// Learn more about F# at http://fsharp.org |
|
// See the 'F# Tutorial' project for more help. |
|
|
|
#load "SystemServices.fs" |
|
#load "DataAccess.fs" |
|
#load "DataCleaning.fs" |
|
#load "DataProcessing.fs" |
|
#load "DataStructuring.fs" |
|
#load "DataServices.fs" |
|
#load "ProductServices.fs" |
|
|
|
open SystemServices |
|
open DataAccess |
|
open DataCleaning |
|
open DataProcessing |
|
open DataStructuring |
|
open DataServices |
|
open WetPancake |
|
open System |
|
open System.IO |
|
|
|
// Template Files |
|
[<Literal>] |
|
let DesktopClock = __SOURCE_DIRECTORY__ + @"\TextFiles\desktop-clock-info.txt" |
|
[<Literal>] |
|
let ConsoleWaterworks = __SOURCE_DIRECTORY__ + @"\TextFiles\console-waterworks-announcement.txt" |
|
[<Literal>] |
|
let WordGenerator = __SOURCE_DIRECTORY__ + @"\TextFiles\word-generator.txt" |
|
[<Literal>] |
|
let TestPost = __SOURCE_DIRECTORY__ + @"\TextFiles\test-post.txt" |
|
|
|
// System Services |
|
let ss_number = PickRandomNumber 10 |
|
let ss_item = |
|
let items = seq {1 .. 10} |
|
PickRandomItem items |
|
let ss_items2 = |
|
let items = seq { for i in 0 .. 0 -> "." } |
|
printfn "ss_items2 length is %i" (Seq.length items) |
|
PickRandomItem items |
|
|
|
// Data Access |
|
let da_sampleFilePath = CreateSampleTextFilesPath |
|
let da_sampleTextFiles = FindSampleTextFiles da_sampleFilePath |
|
let da_sampleFiles = ListSampleFiles |
|
let da_randomSampleFile = SelectRandomSampleFile() |
|
let da_file = LoadFile da_randomSampleFile |
|
|
|
// Data Cleaning |
|
let dc_replace1 = ReplaceArtifact "\"" "dc_repl1 \" end." |
|
let dc_replace2 = ReplaceArtifact "\n\nIn" "dc_repl2 \n\nIn end." |
|
let dc_replace3 = ReplaceArtifact "\r" "dc_repl3 \r end." |
|
let dc_replace4 = ReplaceArtifact "\n" "dc_repl4 \n end." |
|
let dc_replace5 = |
|
let testPath = "a1 \" a2 \n\nIn a3 \r a4 \n end." |
|
testPath |
|
|> ReplaceArtifact "\"" |
|
|> ReplaceArtifact "\n\nIn" |
|
|> ReplaceArtifact "\r" |
|
|> ReplaceArtifact "\n" |
|
let dc_split = SplitText @"\s+" "This is a test string." |
|
|
|
// Data Processing |
|
let dp_isStart = MatchText @"^[A-Z]" "This is a test" |
|
let dp_isEnd = MatchText @"\." "and the end is nigh." |
|
let dp_failStart = MatchText @"^[A-Z]" "somewhere in the middle" |
|
let dp_failEnd = MatchText @"\." "this is not the end" |
|
let dp_words = |
|
let words = ["This"; "is"; "a"; "test"; "."] |
|
ConcatToString words |
|
let dp_words2 = |
|
let words = "This is a test. And has serveral words in it." |
|
SortIntoGroups 2 words |
|
|> Seq.toList |
|
let dp_bisect = |
|
let words = [|"This"; "is"; "a"; "test"; "."; "Contains"; "text"; "."|] |
|
BisectWords words |
|
let dp_combine = |
|
let prev = "This is the previous" |
|
let next = "this is the next" |
|
CombineWords prev next |
|
let dp_combine2 = |
|
let prev = " " |
|
let next = "Prev is whitespace" |
|
CombineWords prev next |
|
let dp_combine3 = |
|
let prev = "Next is empty" |
|
let next = "" |
|
CombineWords prev next |
|
let dp_combine4 = |
|
let prev = "Next is null" |
|
let next = null |
|
CombineWords prev next |
|
let dp_gibberishException = |
|
try |
|
GibberishLevelIsValid 0 // enter a number here |
|
|> ignore |
|
"No exception thrown" |
|
with :? ArgumentException as ex -> ex.Message |
|
let dp_sentencesException = |
|
try |
|
SentencesIsValid 0 // enter a number here |
|
|> ignore |
|
"No exception thrown" |
|
with :? ArgumentException as ex -> ex.Message |
|
let dp_filePathIsValidException = |
|
let invalidPath = "C:/notvalid/test.txt" |
|
let invalidFileType = "C:/notvalid/test.doc" |
|
// Template file paths by the open & load declarations |
|
try |
|
FilePathIsValid ConsoleWaterworks // enter filepath here |
|
|> ignore |
|
"No exception thrown" |
|
with |
|
| :? ArgumentException as ex -> ex.Message |
|
| :? FileNotFoundException as ex -> ex.Message |
|
|
|
let dp_sentences = "This is a test setence." // Edit this to test (valid end token) |
|
let dp_validEndToken () = |
|
let result = TextContainsValidEndToken dp_sentences |
|
result |
|
printfn "Valid End Token: %b" (dp_validEndToken()) |
|
|
|
// Data Structuring |
|
let ds_map = Map.empty |
|
let ds_text = [|"This"; "is"; "a"; "test"; "string."|] |
|
let ds_map2 = |
|
let text = BisectWords ds_text |
|
let result = UpdateMap ds_map (fst(text)) (snd(text)) |
|
result |
|
let ds_map3 = ConstructMap ds_map ds_text |
|
let ds_text2 = [ |
|
[|"This"; "is"; "the"; "first"; "test"; "string."|] |
|
[|"This"; "is"; "the"; "first"; "test"; "string."|] |
|
[|"this"; "is"; "the"; "second"; "test"; "string"|] |
|
[|"this"; "is"; "the"; "third"; "test"; "string"|] |
|
] |
|
let ds_text3 = |
|
"This is\r a test \"string\" for\n\nIn ds_text 3\n" |
|
|> ApplyStandardSetup |
|
let ds_map4 = ConstructMap ds_map ds_text2.[1] |
|
let ds_map5 = [for i in ds_text2 -> ConstructMap ds_map i] |
|
let ds_map6 = |
|
let result = |
|
[for item in ds_map5 do |
|
for i in item -> i] |
|
result |
|
let ds_map7 = |
|
let result = |
|
[for item in ds_map5 -> SeperateStartWords item] |
|
result |
|
let ds_map7Item = ds_map7.Item(0) |
|
|
|
// Data Services |
|
let dss_text = |
|
LoadFile (SelectRandomSampleFile()) |
|
|> ApplyStandardSetup |
|
|> SortIntoGroups 2 |
|
|> GenerateMap |
|
let dss_startwords = SeperateStartWords dss_text |
|
let dss_sentences = GenerateMarkovText 2 dss_text |
|
printfn "Text: %A" dss_sentences |
|
let dss_sentences2 = "This is a sentence. And, so is this. This shouldn't be here! How about this? No!" |
|
let dss_clean = |
|
dss_sentences2 |
|
|> RemoveArtefactSentences 6 // Change the no. of sentences to test. |
|
printfn "CLEANED TEXT: %s" dss_clean |
|
|
|
|
|
// Product Services (Pancake) |
|
let ps_files = Pancake.RequestAllTemplateFilesAsync () |
|
ps_files |> Async.RunSynchronously |
|
|
|
(* Basic Text Generation |
|
====================================================================================================================== |
|
These functions produce text which does not go through the "cleaning" process. |
|
This means the results here are prone to producing an extra sentence on the (very) odd occassion. |
|
As a general rule, it tends to happen when the gibberish-level is set to one of the higher values. |
|
(High gibberish-level values generates "more" coherent sentences). |
|
It is caused when the start-word is itself a sentence, and the text generation loop has not started. |
|
For more information, see the GenerateMarkovText function in DataServices.fs. |
|
The reason you would use these functions ove the "Clean Text Generation" function below is these are faster to finish. |
|
If you can tolerate the occasional extra bit of text and the functions below are too slow, use these functions. |
|
*) |
|
let ps_result1 = Pancake.RequestRandomTextAsync() |
|
let ps_result2 = Pancake.RequestTextAsync 5 10 |
|
let ps_result3 = Pancake.RequestTextFromFileAsync 3 10 ConsoleWaterworks |
|
let ps_result4 = Pancake.RequestTextFromFileAsync 3 10 DesktopClock |
|
let ps_result5 = Pancake.RequestTextFromFileAsync 3 10 WordGenerator |
|
let ps_result6 = Pancake.RequestTextFromFileAsync 3 10 TestPost |
|
|
|
ps_result1 |> Async.RunSynchronously |
|
ps_result2 |> Async.RunSynchronously |
|
ps_result3 |> Async.RunSynchronously |
|
ps_result4 |> Async.RunSynchronously |
|
ps_result5 |> Async.RunSynchronously |
|
ps_result6 |> Async.RunSynchronously |
|
|
|
(* This function is made public so users can clean the text manually. |
|
For the most part, this function should not be called.*) |
|
let ps_cleanText = |
|
// ps_result6 // Change this value for one of the above (ps_result1-6) |
|
//|> Async.RunSynchronously |
|
"This is a sentence. And, so is this. This shouldn't be here! How about this? No!" |
|
|> Pancake.CleanResultAsync 6 // This value must not go above the one declared above (ps_resultX) |
|
|> Async.RunSynchronously |
|
printfn "CLEANED TEXT RESULT: %s" ps_cleanText |
|
|
|
(* Clean Text Generation |
|
====================================================================================================================== |
|
These functions produce text which have gone through the "cleaning" process. |
|
This means the results will go through an extra step and remove any erroneous sentences. |
|
This is unlike the "Basic Text Generation" functions above. |
|
The trade-off here is these "cleaning" functions can be slower to finish executing. |
|
If you need your results to match the number of sentences you requested exactly, you should use these functions. |
|
This, also, applies if you can tolerate the extra processing time. |
|
*) |
|
let ps_cleanResult1 = Pancake.RequestCleanTextAsync 5 10 |
|
let ps_cleanResult2 = Pancake.RequestCleanTextFromFileAsync 3 10 ConsoleWaterworks |
|
let ps_cleanResult3 = Pancake.RequestCleanTextFromFileAsync 3 10 DesktopClock |
|
let ps_cleanResult4 = Pancake.RequestCleanTextFromFileAsync 3 10 WordGenerator |
|
let ps_cleanResult5 = Pancake.RequestCleanTextFromFileAsync 3 10 TestPost |
|
|
|
ps_cleanResult1 |> Async.RunSynchronously |
|
ps_cleanResult2 |> Async.RunSynchronously |
|
ps_cleanResult3 |> Async.RunSynchronously |
|
ps_cleanResult4 |> Async.RunSynchronously |
|
ps_cleanResult5 |> Async.RunSynchronously |
|
|
|
let ps_cleanText2 = |
|
ps_cleanResult1 // Change the number for printing result below (E.G. ps_cleanResult1, ps_cleanResult4...) |
|
|> Async.RunSynchronously |
|
printfn "CLEAN TEXT RESULT: %s" ps_cleanText2 |