|
|
|
@ -21,10 +21,14 @@ func main() {
|
|
|
|
|
|
|
|
|
|
text_slice := SplitBySpace(string(input_text)) |
|
|
|
|
|
|
|
|
|
// fmt.Println(text_slice)
|
|
|
|
|
|
|
|
|
|
text_slice = FindHexBinArticles(text_slice) // convert nr systems
|
|
|
|
|
|
|
|
|
|
text := RemoveDoubleSpaces(strings.Join(text_slice, " ")) // empty slice units removed
|
|
|
|
|
|
|
|
|
|
// fmt.Println(text)
|
|
|
|
|
|
|
|
|
|
text = FixPunctuation(text) // first punctuation check
|
|
|
|
|
|
|
|
|
|
next_slice := SplitBySpace(text) |
|
|
|
@ -37,9 +41,9 @@ func main() {
|
|
|
|
|
|
|
|
|
|
output_text = FixApostrophes(output_text) // apostrophes being checked
|
|
|
|
|
|
|
|
|
|
os.WriteFile(string(os.Args[2]), []byte(string(output_text)), os.ModePerm) |
|
|
|
|
fmt.Println(string(output_text)) |
|
|
|
|
|
|
|
|
|
// fmt.Println(string(output_text))
|
|
|
|
|
os.WriteFile(string(os.Args[2]), []byte(string(output_text)), os.ModePerm) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func RemoveDoubleSpaces(text string) string { |
|
|
|
@ -74,34 +78,74 @@ func FixPunctuation(text string) string {
|
|
|
|
|
|
|
|
|
|
func FixApostrophes(text string) string { |
|
|
|
|
pair_count := true // after finding first apostrophe will turn false, after second back to true
|
|
|
|
|
|
|
|
|
|
for i := 0; i < len(text); i++ { |
|
|
|
|
if string(text[i]) == "'" { |
|
|
|
|
|
|
|
|
|
if i > 0 && i < len(text)-1 && string(text[i-1]) != " " && string(text[i+1]) != " " { |
|
|
|
|
runes := []rune(text) |
|
|
|
|
mark1 := '‘' // rune('\'')
|
|
|
|
|
mark2 := '’' // rune('\'')
|
|
|
|
|
for i, letter := range runes { |
|
|
|
|
|
|
|
|
|
if letter == mark2 { |
|
|
|
|
if i > 0 && i < len(runes)-1 && runes[i-1] != 32 && runes[i+1] != 32 { |
|
|
|
|
fmt.Println("check: mark 2 detected") |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if i < len(text)-2 && string(text[i+1]) == " " && pair_count { |
|
|
|
|
text = text[:i+1] + text[i+2:] |
|
|
|
|
pair_count = false |
|
|
|
|
if letter == mark1 { |
|
|
|
|
if i > 0 && i < len(runes)-1 && runes[i-1] != 32 && runes[i+1] != 32 { |
|
|
|
|
fmt.Println("check: mark 1 detected ") |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if letter == mark1 || letter == mark2 { |
|
|
|
|
|
|
|
|
|
if pair_count { |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
if i > 1 && string(text[i-1]) == " " && !pair_count { |
|
|
|
|
text = text[:i-1] + text[i:] |
|
|
|
|
pair_count = true |
|
|
|
|
if i < (len(runes)-2) && runes[i+1] == 32 { // somewhy will not enter into this if
|
|
|
|
|
// text = text[:i] + "‘" + text[i+2:]
|
|
|
|
|
runes = append(runes[:i], runes[i+2:]...) |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if !pair_count { |
|
|
|
|
pair_count = true |
|
|
|
|
} |
|
|
|
|
if (i > 1) && runes[i-1] == 32 { |
|
|
|
|
// text = text[:i-1] + "’" + text[i+1:]
|
|
|
|
|
runes = append(runes[:i-1], runes[i:]...) |
|
|
|
|
pair_count = true |
|
|
|
|
|
|
|
|
|
// a = append(a[:i], append([]T{x}, a[i:]...)...) --to insert
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return text |
|
|
|
|
/* |
|
|
|
|
for i := 0; i < len(text); i++ { //initial apostrophe check for " ' " markings (functional)
|
|
|
|
|
if string(text[i]) == "'" { |
|
|
|
|
|
|
|
|
|
if i > 0 && i < len(text)-1 && string(text[i-1]) != " " && string(text[i+1]) != " " { |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if i < len(text)-2 && string(text[i+1]) == " " && pair_count { |
|
|
|
|
text = text[:i+1] + text[i+2:] |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
if pair_count { |
|
|
|
|
pair_count = false |
|
|
|
|
continue |
|
|
|
|
} |
|
|
|
|
if i > 1 && string(text[i-1]) == " " && !pair_count { |
|
|
|
|
text = text[:i-1] + text[i:] |
|
|
|
|
pair_count = true |
|
|
|
|
} |
|
|
|
|
if !pair_count { |
|
|
|
|
pair_count = true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
*/ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func FindHexBinArticles(slice []string) []string { |
|
|
|
@ -173,12 +217,12 @@ func SplitBySpace(text string) []string {
|
|
|
|
|
str_temp := "" // temporary string
|
|
|
|
|
text_slice := make([]string, 0) |
|
|
|
|
|
|
|
|
|
for i := 0; i < len(text); i++ { |
|
|
|
|
for i, char := range text { |
|
|
|
|
|
|
|
|
|
if string(text[i]) != separator { |
|
|
|
|
str_temp += string(text[i]) |
|
|
|
|
if string(char) != separator { |
|
|
|
|
str_temp += string(char) |
|
|
|
|
} else if str_temp == "(low," || str_temp == "(up," || str_temp == "(cap," { |
|
|
|
|
str_temp += string(text[i]) |
|
|
|
|
str_temp += string(char) |
|
|
|
|
continue |
|
|
|
|
} else { |
|
|
|
|
text_slice = append(text_slice, str_temp) |
|
|
|
|