The purpose of this repository is to provide a way for people to generate random "placeholder text" -- with a Markov Chain. https://www.craigoates.net/Software/project/12
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

233 lines
8.7 KiB

// Learn more about F# at http://fsharp.org
// See the 'F# Tutorial' project for more help.
#load "SystemServices.fs"
#load "DataAccess.fs"
#load "DataCleaning.fs"
#load "DataProcessing.fs"
#load "DataStructuring.fs"
#load "DataServices.fs"
#load "ProductServices.fs"
open SystemServices
open DataAccess
open DataCleaning
open DataProcessing
open DataStructuring
open DataServices
open WetPancake
open System
open System.IO
// Template Files
[<Literal>]
let DesktopClock = __SOURCE_DIRECTORY__ + @"\TextFiles\desktop-clock-info.txt"
[<Literal>]
let ConsoleWaterworks = __SOURCE_DIRECTORY__ + @"\TextFiles\console-waterworks-announcement.txt"
[<Literal>]
let WordGenerator = __SOURCE_DIRECTORY__ + @"\TextFiles\word-generator.txt"
[<Literal>]
let TestPost = __SOURCE_DIRECTORY__ + @"\TextFiles\test-post.txt"
// System Services
let ss_number = PickRandomNumber 10
let ss_item =
let items = seq {1 .. 10}
PickRandomItem items
let ss_items2 =
let items = seq { for i in 0 .. 0 -> "." }
printfn "ss_items2 length is %i" (Seq.length items)
PickRandomItem items
// Data Access
let da_sampleFilePath = CreateSampleTextFilesPath
let da_sampleTextFiles = FindSampleTextFiles da_sampleFilePath
let da_sampleFiles = ListSampleFiles
let da_randomSampleFile = SelectRandomSampleFile()
let da_file = LoadFile da_randomSampleFile
// Data Cleaning
let dc_replace1 = ReplaceArtifact "\"" "dc_repl1 \" end."
let dc_replace2 = ReplaceArtifact "\n\nIn" "dc_repl2 \n\nIn end."
let dc_replace3 = ReplaceArtifact "\r" "dc_repl3 \r end."
let dc_replace4 = ReplaceArtifact "\n" "dc_repl4 \n end."
let dc_replace5 =
let testPath = "a1 \" a2 \n\nIn a3 \r a4 \n end."
testPath
|> ReplaceArtifact "\""
|> ReplaceArtifact "\n\nIn"
|> ReplaceArtifact "\r"
|> ReplaceArtifact "\n"
let dc_split = SplitText @"\s+" "This is a test string."
// Data Processing
let dp_isStart = MatchText @"^[A-Z]" "This is a test"
let dp_isEnd = MatchText @"\." "and the end is nigh."
let dp_failStart = MatchText @"^[A-Z]" "somewhere in the middle"
let dp_failEnd = MatchText @"\." "this is not the end"
let dp_words =
let words = ["This"; "is"; "a"; "test"; "."]
ConcatToString words
let dp_words2 =
let words = "This is a test. And has serveral words in it."
SortIntoGroups 2 words
|> Seq.toList
let dp_bisect =
let words = [|"This"; "is"; "a"; "test"; "."; "Contains"; "text"; "."|]
BisectWords words
let dp_combine =
let prev = "This is the previous"
let next = "this is the next"
CombineWords prev next
let dp_combine2 =
let prev = " "
let next = "Prev is whitespace"
CombineWords prev next
let dp_combine3 =
let prev = "Next is empty"
let next = ""
CombineWords prev next
let dp_combine4 =
let prev = "Next is null"
let next = null
CombineWords prev next
let dp_gibberishException =
try
GibberishLevelIsValid 0 // enter a number here
|> ignore
"No exception thrown"
with :? ArgumentException as ex -> ex.Message
let dp_sentencesException =
try
SentencesIsValid 0 // enter a number here
|> ignore
"No exception thrown"
with :? ArgumentException as ex -> ex.Message
let dp_filePathIsValidException =
let invalidPath = "C:/notvalid/test.txt"
let invalidFileType = "C:/notvalid/test.doc"
// Template file paths by the open & load declarations
try
FilePathIsValid ConsoleWaterworks // enter filepath here
|> ignore
"No exception thrown"
with
| :? ArgumentException as ex -> ex.Message
| :? FileNotFoundException as ex -> ex.Message
let dp_sentences = "This is a test setence." // Edit this to test (valid end token)
let dp_validEndToken () =
let result = TextContainsValidEndToken dp_sentences
result
printfn "Valid End Token: %b" (dp_validEndToken())
// Data Structuring
let ds_map = Map.empty
let ds_text = [|"This"; "is"; "a"; "test"; "string."|]
let ds_map2 =
let text = BisectWords ds_text
let result = UpdateMap ds_map (fst(text)) (snd(text))
result
let ds_map3 = ConstructMap ds_map ds_text
let ds_text2 = [
[|"This"; "is"; "the"; "first"; "test"; "string."|]
[|"This"; "is"; "the"; "first"; "test"; "string."|]
[|"this"; "is"; "the"; "second"; "test"; "string"|]
[|"this"; "is"; "the"; "third"; "test"; "string"|]
]
let ds_text3 =
"This is\r a test \"string\" for\n\nIn ds_text 3\n"
|> ApplyStandardSetup
let ds_map4 = ConstructMap ds_map ds_text2.[1]
let ds_map5 = [for i in ds_text2 -> ConstructMap ds_map i]
let ds_map6 =
let result =
[for item in ds_map5 do
for i in item -> i]
result
let ds_map7 =
let result =
[for item in ds_map5 -> SeperateStartWords item]
result
let ds_map7Item = ds_map7.Item(0)
// Data Services
let dss_text =
LoadFile (SelectRandomSampleFile())
|> ApplyStandardSetup
|> SortIntoGroups 2
|> GenerateMap
let dss_startwords = SeperateStartWords dss_text
let dss_sentences = GenerateMarkovText 2 dss_text
printfn "Text: %A" dss_sentences
let dss_sentences2 = "This is a sentence. And, so is this. This shouldn't be here! How about this? No!"
let dss_clean =
dss_sentences2
|> RemoveArtefactSentences 6 // Change the no. of sentences to test.
printfn "CLEANED TEXT: %s" dss_clean
// Product Services (Pancake)
let ps_files = Pancake.RequestAllTemplateFilesAsync ()
ps_files |> Async.RunSynchronously
(* Basic Text Generation
======================================================================================================================
These functions produce text which does not go through the "cleaning" process.
This means the results here are prone to producing an extra sentence on the (very) odd occassion.
As a general rule, it tends to happen when the gibberish-level is set to one of the higher values.
(High gibberish-level values generates "more" coherent sentences).
It is caused when the start-word is itself a sentence, and the text generation loop has not started.
For more information, see the GenerateMarkovText function in DataServices.fs.
The reason you would use these functions ove the "Clean Text Generation" function below is these are faster to finish.
If you can tolerate the occasional extra bit of text and the functions below are too slow, use these functions.
*)
let ps_result1 = Pancake.RequestRandomTextAsync()
let ps_result2 = Pancake.RequestTextAsync 5 10
let ps_result3 = Pancake.RequestTextFromFileAsync 3 10 ConsoleWaterworks
let ps_result4 = Pancake.RequestTextFromFileAsync 3 10 DesktopClock
let ps_result5 = Pancake.RequestTextFromFileAsync 3 10 WordGenerator
let ps_result6 = Pancake.RequestTextFromFileAsync 3 10 TestPost
ps_result1 |> Async.RunSynchronously
ps_result2 |> Async.RunSynchronously
ps_result3 |> Async.RunSynchronously
ps_result4 |> Async.RunSynchronously
ps_result5 |> Async.RunSynchronously
ps_result6 |> Async.RunSynchronously
(* This function is made public so users can clean the text manually.
For the most part, this function should not be called.*)
let ps_cleanText =
// ps_result6 // Change this value for one of the above (ps_result1-6)
//|> Async.RunSynchronously
"This is a sentence. And, so is this. This shouldn't be here! How about this? No!"
|> Pancake.CleanResultAsync 6 // This value must not go above the one declared above (ps_resultX)
|> Async.RunSynchronously
printfn "CLEANED TEXT RESULT: %s" ps_cleanText
(* Clean Text Generation
======================================================================================================================
These functions produce text which have gone through the "cleaning" process.
This means the results will go through an extra step and remove any erroneous sentences.
This is unlike the "Basic Text Generation" functions above.
The trade-off here is these "cleaning" functions can be slower to finish executing.
If you need your results to match the number of sentences you requested exactly, you should use these functions.
This, also, applies if you can tolerate the extra processing time.
*)
let ps_cleanResult1 = Pancake.RequestCleanTextAsync 5 10
let ps_cleanResult2 = Pancake.RequestCleanTextFromFileAsync 3 10 ConsoleWaterworks
let ps_cleanResult3 = Pancake.RequestCleanTextFromFileAsync 3 10 DesktopClock
let ps_cleanResult4 = Pancake.RequestCleanTextFromFileAsync 3 10 WordGenerator
let ps_cleanResult5 = Pancake.RequestCleanTextFromFileAsync 3 10 TestPost
ps_cleanResult1 |> Async.RunSynchronously
ps_cleanResult2 |> Async.RunSynchronously
ps_cleanResult3 |> Async.RunSynchronously
ps_cleanResult4 |> Async.RunSynchronously
ps_cleanResult5 |> Async.RunSynchronously
let ps_cleanText2 =
ps_cleanResult1 // Change the number for printing result below (E.G. ps_cleanResult1, ps_cleanResult4...)
|> Async.RunSynchronously
printfn "CLEAN TEXT RESULT: %s" ps_cleanText2