|
|
|
// Learn more about F# at http://fsharp.org
|
|
|
|
// See the 'F# Tutorial' project for more help.
|
|
|
|
|
|
|
|
#load "SystemServices.fs"
|
|
|
|
#load "DataAccess.fs"
|
|
|
|
#load "DataCleaning.fs"
|
|
|
|
#load "DataProcessing.fs"
|
|
|
|
#load "DataStructuring.fs"
|
|
|
|
#load "DataServices.fs"
|
|
|
|
#load "ProductServices.fs"
|
|
|
|
|
|
|
|
open SystemServices
|
|
|
|
open DataAccess
|
|
|
|
open DataCleaning
|
|
|
|
open DataProcessing
|
|
|
|
open DataStructuring
|
|
|
|
open DataServices
|
|
|
|
open WetPancake
|
|
|
|
open System
|
|
|
|
open System.IO
|
|
|
|
|
|
|
|
// Template Files
|
|
|
|
[<Literal>]
|
|
|
|
let DesktopClock = __SOURCE_DIRECTORY__ + @"\TextFiles\desktop-clock-info.txt"
|
|
|
|
[<Literal>]
|
|
|
|
let ConsoleWaterworks = __SOURCE_DIRECTORY__ + @"\TextFiles\console-waterworks-announcement.txt"
|
|
|
|
[<Literal>]
|
|
|
|
let WordGenerator = __SOURCE_DIRECTORY__ + @"\TextFiles\word-generator.txt"
|
|
|
|
[<Literal>]
|
|
|
|
let TestPost = __SOURCE_DIRECTORY__ + @"\TextFiles\test-post.txt"
|
|
|
|
|
|
|
|
// System Services
|
|
|
|
let ss_number = PickRandomNumber 10
|
|
|
|
let ss_item =
|
|
|
|
let items = seq {1 .. 10}
|
|
|
|
PickRandomItem items
|
|
|
|
let ss_items2 =
|
|
|
|
let items = seq { for i in 0 .. 0 -> "." }
|
|
|
|
printfn "ss_items2 length is %i" (Seq.length items)
|
|
|
|
PickRandomItem items
|
|
|
|
|
|
|
|
// Data Access
|
|
|
|
let da_sampleFilePath = CreateSampleTextFilesPath
|
|
|
|
let da_sampleTextFiles = FindSampleTextFiles da_sampleFilePath
|
|
|
|
let da_sampleFiles = ListSampleFiles
|
|
|
|
let da_randomSampleFile = SelectRandomSampleFile()
|
|
|
|
let da_file = LoadFile da_randomSampleFile
|
|
|
|
|
|
|
|
// Data Cleaning
|
|
|
|
let dc_replace1 = ReplaceArtifact "\"" "dc_repl1 \" end."
|
|
|
|
let dc_replace2 = ReplaceArtifact "\n\nIn" "dc_repl2 \n\nIn end."
|
|
|
|
let dc_replace3 = ReplaceArtifact "\r" "dc_repl3 \r end."
|
|
|
|
let dc_replace4 = ReplaceArtifact "\n" "dc_repl4 \n end."
|
|
|
|
let dc_replace5 =
|
|
|
|
let testPath = "a1 \" a2 \n\nIn a3 \r a4 \n end."
|
|
|
|
testPath
|
|
|
|
|> ReplaceArtifact "\""
|
|
|
|
|> ReplaceArtifact "\n\nIn"
|
|
|
|
|> ReplaceArtifact "\r"
|
|
|
|
|> ReplaceArtifact "\n"
|
|
|
|
let dc_split = SplitText @"\s+" "This is a test string."
|
|
|
|
|
|
|
|
// Data Processing
|
|
|
|
let dp_isStart = MatchText @"^[A-Z]" "This is a test"
|
|
|
|
let dp_isEnd = MatchText @"\." "and the end is nigh."
|
|
|
|
let dp_failStart = MatchText @"^[A-Z]" "somewhere in the middle"
|
|
|
|
let dp_failEnd = MatchText @"\." "this is not the end"
|
|
|
|
let dp_words =
|
|
|
|
let words = ["This"; "is"; "a"; "test"; "."]
|
|
|
|
ConcatToString words
|
|
|
|
let dp_words2 =
|
|
|
|
let words = "This is a test. And has serveral words in it."
|
|
|
|
SortIntoGroups 2 words
|
|
|
|
|> Seq.toList
|
|
|
|
let dp_bisect =
|
|
|
|
let words = [|"This"; "is"; "a"; "test"; "."; "Contains"; "text"; "."|]
|
|
|
|
BisectWords words
|
|
|
|
let dp_combine =
|
|
|
|
let prev = "This is the previous"
|
|
|
|
let next = "this is the next"
|
|
|
|
CombineWords prev next
|
|
|
|
let dp_combine2 =
|
|
|
|
let prev = " "
|
|
|
|
let next = "Prev is whitespace"
|
|
|
|
CombineWords prev next
|
|
|
|
let dp_combine3 =
|
|
|
|
let prev = "Next is empty"
|
|
|
|
let next = ""
|
|
|
|
CombineWords prev next
|
|
|
|
let dp_combine4 =
|
|
|
|
let prev = "Next is null"
|
|
|
|
let next = null
|
|
|
|
CombineWords prev next
|
|
|
|
let dp_gibberishException =
|
|
|
|
try
|
|
|
|
GibberishLevelIsValid 0 // enter a number here
|
|
|
|
|> ignore
|
|
|
|
"No exception thrown"
|
|
|
|
with :? ArgumentException as ex -> ex.Message
|
|
|
|
let dp_sentencesException =
|
|
|
|
try
|
|
|
|
SentencesIsValid 0 // enter a number here
|
|
|
|
|> ignore
|
|
|
|
"No exception thrown"
|
|
|
|
with :? ArgumentException as ex -> ex.Message
|
|
|
|
let dp_filePathIsValidException =
|
|
|
|
let invalidPath = "C:/notvalid/test.txt"
|
|
|
|
let invalidFileType = "C:/notvalid/test.doc"
|
|
|
|
// Template file paths by the open & load declarations
|
|
|
|
try
|
|
|
|
FilePathIsValid ConsoleWaterworks // enter filepath here
|
|
|
|
|> ignore
|
|
|
|
"No exception thrown"
|
|
|
|
with
|
|
|
|
| :? ArgumentException as ex -> ex.Message
|
|
|
|
| :? FileNotFoundException as ex -> ex.Message
|
|
|
|
|
|
|
|
let dp_sentences = "This is a test setence." // Edit this to test (valid end token)
|
|
|
|
let dp_validEndToken () =
|
|
|
|
let result = TextContainsValidEndToken dp_sentences
|
|
|
|
result
|
|
|
|
printfn "Valid End Token: %b" (dp_validEndToken())
|
|
|
|
|
|
|
|
// Data Structuring
|
|
|
|
let ds_map = Map.empty
|
|
|
|
let ds_text = [|"This"; "is"; "a"; "test"; "string."|]
|
|
|
|
let ds_map2 =
|
|
|
|
let text = BisectWords ds_text
|
|
|
|
let result = UpdateMap ds_map (fst(text)) (snd(text))
|
|
|
|
result
|
|
|
|
let ds_map3 = ConstructMap ds_map ds_text
|
|
|
|
let ds_text2 = [
|
|
|
|
[|"This"; "is"; "the"; "first"; "test"; "string."|]
|
|
|
|
[|"This"; "is"; "the"; "first"; "test"; "string."|]
|
|
|
|
[|"this"; "is"; "the"; "second"; "test"; "string"|]
|
|
|
|
[|"this"; "is"; "the"; "third"; "test"; "string"|]
|
|
|
|
]
|
|
|
|
let ds_text3 =
|
|
|
|
"This is\r a test \"string\" for\n\nIn ds_text 3\n"
|
|
|
|
|> ApplyStandardSetup
|
|
|
|
let ds_map4 = ConstructMap ds_map ds_text2.[1]
|
|
|
|
let ds_map5 = [for i in ds_text2 -> ConstructMap ds_map i]
|
|
|
|
let ds_map6 =
|
|
|
|
let result =
|
|
|
|
[for item in ds_map5 do
|
|
|
|
for i in item -> i]
|
|
|
|
result
|
|
|
|
let ds_map7 =
|
|
|
|
let result =
|
|
|
|
[for item in ds_map5 -> SeperateStartWords item]
|
|
|
|
result
|
|
|
|
let ds_map7Item = ds_map7.Item(0)
|
|
|
|
|
|
|
|
// Data Services
|
|
|
|
let dss_text =
|
|
|
|
LoadFile (SelectRandomSampleFile())
|
|
|
|
|> ApplyStandardSetup
|
|
|
|
|> SortIntoGroups 2
|
|
|
|
|> GenerateMap
|
|
|
|
let dss_startwords = SeperateStartWords dss_text
|
|
|
|
let dss_sentences = GenerateMarkovText 2 dss_text
|
|
|
|
printfn "Text: %A" dss_sentences
|
|
|
|
let dss_sentences2 = "This is a sentence. And, so is this. This shouldn't be here! How about this? No!"
|
|
|
|
let dss_clean =
|
|
|
|
dss_sentences2
|
|
|
|
|> RemoveArtefactSentences 6 // Change the no. of sentences to test.
|
|
|
|
printfn "CLEANED TEXT: %s" dss_clean
|
|
|
|
|
|
|
|
|
|
|
|
// Product Services (Pancake)
|
|
|
|
let ps_files = Pancake.RequestAllTemplateFilesAsync ()
|
|
|
|
ps_files |> Async.RunSynchronously
|
|
|
|
|
|
|
|
(* Basic Text Generation
|
|
|
|
======================================================================================================================
|
|
|
|
These functions produce text which does not go through the "cleaning" process.
|
|
|
|
This means the results here are prone to producing an extra sentence on the (very) odd occassion.
|
|
|
|
As a general rule, it tends to happen when the gibberish-level is set to one of the higher values.
|
|
|
|
(High gibberish-level values generates "more" coherent sentences).
|
|
|
|
It is caused when the start-word is itself a sentence, and the text generation loop has not started.
|
|
|
|
For more information, see the GenerateMarkovText function in DataServices.fs.
|
|
|
|
The reason you would use these functions ove the "Clean Text Generation" function below is these are faster to finish.
|
|
|
|
If you can tolerate the occasional extra bit of text and the functions below are too slow, use these functions.
|
|
|
|
*)
|
|
|
|
let ps_result1 = Pancake.RequestRandomTextAsync()
|
|
|
|
let ps_result2 = Pancake.RequestTextAsync 5 10
|
|
|
|
let ps_result3 = Pancake.RequestTextFromFileAsync 3 10 ConsoleWaterworks
|
|
|
|
let ps_result4 = Pancake.RequestTextFromFileAsync 3 10 DesktopClock
|
|
|
|
let ps_result5 = Pancake.RequestTextFromFileAsync 3 10 WordGenerator
|
|
|
|
let ps_result6 = Pancake.RequestTextFromFileAsync 3 10 TestPost
|
|
|
|
|
|
|
|
ps_result1 |> Async.RunSynchronously
|
|
|
|
ps_result2 |> Async.RunSynchronously
|
|
|
|
ps_result3 |> Async.RunSynchronously
|
|
|
|
ps_result4 |> Async.RunSynchronously
|
|
|
|
ps_result5 |> Async.RunSynchronously
|
|
|
|
ps_result6 |> Async.RunSynchronously
|
|
|
|
|
|
|
|
(* This function is made public so users can clean the text manually.
|
|
|
|
For the most part, this function should not be called.*)
|
|
|
|
let ps_cleanText =
|
|
|
|
// ps_result6 // Change this value for one of the above (ps_result1-6)
|
|
|
|
//|> Async.RunSynchronously
|
|
|
|
"This is a sentence. And, so is this. This shouldn't be here! How about this? No!"
|
|
|
|
|> Pancake.CleanResultAsync 6 // This value must not go above the one declared above (ps_resultX)
|
|
|
|
|> Async.RunSynchronously
|
|
|
|
printfn "CLEANED TEXT RESULT: %s" ps_cleanText
|
|
|
|
|
|
|
|
(* Clean Text Generation
|
|
|
|
======================================================================================================================
|
|
|
|
These functions produce text which have gone through the "cleaning" process.
|
|
|
|
This means the results will go through an extra step and remove any erroneous sentences.
|
|
|
|
This is unlike the "Basic Text Generation" functions above.
|
|
|
|
The trade-off here is these "cleaning" functions can be slower to finish executing.
|
|
|
|
If you need your results to match the number of sentences you requested exactly, you should use these functions.
|
|
|
|
This, also, applies if you can tolerate the extra processing time.
|
|
|
|
*)
|
|
|
|
let ps_cleanResult1 = Pancake.RequestCleanTextAsync 5 10
|
|
|
|
let ps_cleanResult2 = Pancake.RequestCleanTextFromFileAsync 3 10 ConsoleWaterworks
|
|
|
|
let ps_cleanResult3 = Pancake.RequestCleanTextFromFileAsync 3 10 DesktopClock
|
|
|
|
let ps_cleanResult4 = Pancake.RequestCleanTextFromFileAsync 3 10 WordGenerator
|
|
|
|
let ps_cleanResult5 = Pancake.RequestCleanTextFromFileAsync 3 10 TestPost
|
|
|
|
|
|
|
|
ps_cleanResult1 |> Async.RunSynchronously
|
|
|
|
ps_cleanResult2 |> Async.RunSynchronously
|
|
|
|
ps_cleanResult3 |> Async.RunSynchronously
|
|
|
|
ps_cleanResult4 |> Async.RunSynchronously
|
|
|
|
ps_cleanResult5 |> Async.RunSynchronously
|
|
|
|
|
|
|
|
let ps_cleanText2 =
|
|
|
|
ps_cleanResult1 // Change the number for printing result below (E.G. ps_cleanResult1, ps_cleanResult4...)
|
|
|
|
|> Async.RunSynchronously
|
|
|
|
printfn "CLEAN TEXT RESULT: %s" ps_cleanText2
|