Craig Oates
6 years ago
2 changed files with 45 additions and 2 deletions
@ -1,3 +1,46 @@
|
||||
module internal TextProcessing |
||||
|
||||
open FileProcessing |
||||
open System.Text.RegularExpressions |
||||
open System |
||||
|
||||
let SplitIntoWords text = Regex.Split(text, @"\@s+") |
||||
|
||||
let JoinWords words = String.concat " " words |
||||
|
||||
let GetWordPairs pairSize = SplitIntoWords >> Seq.windowed pairSize |
||||
|
||||
let BisectWords (words:_[]) = |
||||
let length = Array.length words |
||||
let predictions = words |> Seq.take (length - 1) |
||||
(predictions |> JoinWords, words.[length - 1]) |
||||
|
||||
let UpdateMap (map:Map<_,_>) key value = |
||||
if map.ContainsKey key then |
||||
let existingValue = map.[key] |
||||
let map = map |> Map.remove key |
||||
map |> Map.add key (value :: existingValue) |
||||
else |
||||
map.Add(key, [value]) |
||||
|
||||
let MapBuilder map words = |
||||
BisectWords words ||> UpdateMap map |
||||
|
||||
let BuildMarkovMap<'a> = Seq.fold MapBuilder Map.empty |
||||
|
||||
let GetRandomWord (random: int -> int) seq = |
||||
let randomIndex = random (Seq.length seq) |
||||
seq |> Seq.item randomIndex |
||||
|
||||
let GetWord words = |
||||
let randomWord = Random().Next(Seq.length words) |
||||
words |> Seq.item randomWord |
||||
|
||||
let CombineWords prev next = |
||||
[prev; next] |
||||
|> List.filter (fun s -> not (String.IsNullOrWhiteSpace s)) |
||||
|> JoinWords |
||||
|
||||
let GetMarkovSentence startWord markovChain = |
||||
markovChain startWord [startWord] |
||||
|> List.rev |
||||
|> JoinWords |
Loading…
Reference in new issue