|
|
|
@ -21,8 +21,6 @@ func main() {
|
|
|
|
|
|
|
|
|
|
text_slice := SplitBySpace(string(input_text)) |
|
|
|
|
|
|
|
|
|
// fmt.Println(text_slice)
|
|
|
|
|
|
|
|
|
|
text_slice = FindHexBinArticles(text_slice) // convert nr systems
|
|
|
|
|
|
|
|
|
|
text := RemoveDoubleSpaces(strings.Join(text_slice, " ")) // empty slice units removed
|
|
|
|
@ -79,85 +77,47 @@ func FixPunctuation(text string) string {
|
|
|
|
|
func FixApostrophes(text string) string { |
|
|
|
|
pair_count := true // after finding first apostrophe will turn false, after second back to true
|
|
|
|
|
runes := []rune(text) |
|
|
|
|
mark1 := '‘' // rune('\'')
|
|
|
|
|
mark2 := '’' // rune('\'')
|
|
|
|
|
mark1 := '‘' |
|
|
|
|
mark2 := '’' |
|
|
|
|
for i, letter := range runes { |
|
|
|
|
if letter == mark1 || letter == mark2 { |
|
|
|
|
|
|
|
|
|
if letter == mark2 { |
|
|
|
|
// fmt.Println("mark2 detected")
|
|
|
|
|
if i > 0 && i < len(runes)-1 && runes[i-1] != 32 && runes[i+1] != 32 { |
|
|
|
|
// fmt.Println("check: mark 2 in text")
|
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if letter == mark1 { // separate if for troubleshooting only; will be joined with previous if
|
|
|
|
|
// fmt.Println("mark1 detected")
|
|
|
|
|
if i > 0 && i < len(runes)-1 && runes[i-1] != 32 && runes[i+1] != 32 { |
|
|
|
|
// fmt.Println("check: mark 1 in text")
|
|
|
|
|
runes[i] = mark2 |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
// runes = append(runes[:i], runes[i+2:]...)
|
|
|
|
|
// runes = append(runes[:i-1], runes[i:]...)
|
|
|
|
|
|
|
|
|
|
if letter == mark1 || letter == mark2 { |
|
|
|
|
|
|
|
|
|
if pair_count { |
|
|
|
|
if i < len(runes)-2 && runes[i+1] == rune(32) { |
|
|
|
|
runes = append(runes[:i], runes[i+2:]...) // suspected reason for malfunction
|
|
|
|
|
runes[i] = mark1 |
|
|
|
|
runes = append(runes[:i+1], runes[i+2:]...) |
|
|
|
|
pair_count = false |
|
|
|
|
// fmt.Println("first_...")
|
|
|
|
|
continue |
|
|
|
|
} else { |
|
|
|
|
pair_count = false |
|
|
|
|
// fmt.Println("first...")
|
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if !pair_count { |
|
|
|
|
if i > 1 && runes[i-1] == rune(32) { |
|
|
|
|
runes = append(runes[:i-1], runes[i:]...) // suspected reason for malfunction
|
|
|
|
|
runes[i] = mark2 |
|
|
|
|
runes = append(runes[:i-1], runes[i:]...) |
|
|
|
|
pair_count = true |
|
|
|
|
// fmt.Println("..._second")
|
|
|
|
|
continue |
|
|
|
|
} else { |
|
|
|
|
pair_count = true |
|
|
|
|
// fmt.Println("...second")
|
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return text |
|
|
|
|
/* |
|
|
|
|
for i := 0; i < len(text); i++ { //initial apostrophe check for " ' " markings (functional)
|
|
|
|
|
if string(text[i]) == "'" { |
|
|
|
|
|
|
|
|
|
if i > 0 && i < len(text)-1 && string(text[i-1]) != " " && string(text[i+1]) != " " { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if i < len(text)-2 && string(text[i+1]) == " " && pair_count { |
|
|
|
|
text = text[:i+1] + text[i+2:] |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
if pair_count { |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
if i > 1 && string(text[i-1]) == " " && !pair_count { |
|
|
|
|
text = text[:i-1] + text[i:] |
|
|
|
|
pair_count = true |
|
|
|
|
} |
|
|
|
|
if !pair_count { |
|
|
|
|
pair_count = true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*/ |
|
|
|
|
text = "" |
|
|
|
|
for _, char := range runes { |
|
|
|
|
text += string(char) |
|
|
|
|
} |
|
|
|
|
return text |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func FindHexBinArticles(slice []string) []string { |
|
|
|
|