Coverage for adhoc-cicd-odoo-odoo / odoo / tools / arabic_reshaper / __init__.py: 9%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-09 18:05 +0000

1# The following code was copied from the original author's repository 

2# at https://github.com/mpcabd/python-arabic-reshaper/tree/v3.0.0/arabic_reshaper 

3# Version: 3.0.0 

4# This work is licensed under the MIT License. 

5# To view a copy of this license, visit https://opensource.org/licenses/MIT 

6# Written by Abdullah Diab (mpcabd) 

7# Email: mpcabd@gmail.com 

8# Website: http://mpcabd.xyz 

9# 

10# This code was simplified by removing configuration (keeping only the default 

11# configuration) then constant-folding all the configuration items by hand. 

12 

13import re 

14 

15from itertools import repeat 

16 

17from .letters import (UNSHAPED, ISOLATED, TATWEEL, ZWJ, LETTERS_ARABIC, FINAL, 

18 INITIAL, MEDIAL, connects_with_letters_before_and_after, 

19 connects_with_letter_before, connects_with_letter_after) 

20 

21__all__ = ['reshape'] 

22 

23HARAKAT_RE = re.compile( 

24 '[' 

25 '\u0610-\u061a' 

26 '\u064b-\u065f' 

27 '\u0670' 

28 '\u06d6-\u06dc' 

29 '\u06df-\u06e8' 

30 '\u06ea-\u06ed' 

31 '\u08d4-\u08e1' 

32 '\u08d4-\u08ed' 

33 '\u08e3-\u08ff' 

34 ']', 

35 

36 re.UNICODE | re.VERBOSE 

37) 

38 

39 

40LIGATURES_RE = re.compile(""" 

41 \u0627\u0644\u0644\u0647 # ARABIC LIGATURE ALLAH 

42 | \u0644\u0627 # ARABIC LIGATURE LAM WITH ALEF 

43 | \u0644\u0623 # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE 

44 | \u0644\u0625 # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW 

45 | \u0644\u0622 # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE 

46""", re.UNICODE | re.VERBOSE) 

47 

48GROUP_INDEX_TO_LIGATURE_FORMs = [ 

49 ('\N{ARABIC LIGATURE ALLAH ISOLATED FORM}', '', '', ''), 

50 ('\N{ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF FINAL FORM}'), 

51 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM}'), 

52 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM}'), 

53 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM}'), 

54] 

55 

56 

57def reshape(text): 

58 if not text: 

59 return '' 

60 

61 output = [] 

62 

63 LETTER = 0 

64 FORM = 1 

65 NOT_SUPPORTED = -1 

66 

67 for letter in text: 

68 if HARAKAT_RE.match(letter): 

69 pass 

70 elif letter not in LETTERS_ARABIC: 

71 output.append((letter, NOT_SUPPORTED)) 

72 elif not output: # first letter 

73 output.append((letter, ISOLATED)) 

74 else: 

75 previous_letter = output[-1] 

76 if ( 

77 previous_letter[FORM] == NOT_SUPPORTED or 

78 not connects_with_letter_before(letter, LETTERS_ARABIC) or 

79 not connects_with_letter_after(previous_letter[LETTER], LETTERS_ARABIC) or 

80 (previous_letter[FORM] == FINAL and not connects_with_letters_before_and_after(previous_letter[LETTER], LETTERS_ARABIC)) 

81 ): 

82 output.append((letter, ISOLATED)) 

83 elif previous_letter[FORM] == ISOLATED: 

84 output[-1] = (previous_letter[LETTER], INITIAL) 

85 output.append((letter, FINAL)) 

86 # Otherwise, we will change the previous letter to connect 

87 # to the current letter 

88 else: 

89 output[-1] = (previous_letter[LETTER], MEDIAL) 

90 output.append((letter, FINAL)) 

91 

92 # Remove ZWJ if it's the second to last item as it won't be useful 

93 if len(output) > 1 and output[-2][LETTER] == ZWJ: 

94 output.pop(len(output) - 2) 

95 

96 if output and output[-1][LETTER] == ZWJ: 

97 output.pop() 

98 

99 # Clean text from Harakat to be able to find ligatures 

100 text = HARAKAT_RE.sub('', text) 

101 

102 for match in LIGATURES_RE.finditer(text): 

103 group_index = next(( 

104 i for i, group in enumerate(match.groups()) if group 

105 ), -1) 

106 forms = GROUP_INDEX_TO_LIGATURE_FORMs[group_index] 

107 a, b = match.span() 

108 a_form = output[a][FORM] 

109 b_form = output[b - 1][FORM] 

110 

111 # +-----------+----------+---------+---------+----------+ 

112 # | a \ b | ISOLATED | INITIAL | MEDIAL | FINAL | 

113 # +-----------+----------+---------+---------+----------+ 

114 # | ISOLATED | ISOLATED | INITIAL | INITIAL | ISOLATED | 

115 # | INITIAL | ISOLATED | INITIAL | INITIAL | ISOLATED | 

116 # | MEDIAL | FINAL | MEDIAL | MEDIAL | FINAL | 

117 # | FINAL | FINAL | MEDIAL | MEDIAL | FINAL | 

118 # +-----------+----------+---------+---------+----------+ 

119 

120 if a_form in (ISOLATED, INITIAL): 

121 if b_form in (ISOLATED, FINAL): 

122 ligature_form = ISOLATED 

123 else: 

124 ligature_form = INITIAL 

125 else: 

126 if b_form in (ISOLATED, FINAL): 

127 ligature_form = FINAL 

128 else: 

129 ligature_form = MEDIAL 

130 if not forms[ligature_form]: 

131 continue 

132 output[a] = (forms[ligature_form], NOT_SUPPORTED) 

133 output[a + 1:b] = repeat(('', NOT_SUPPORTED), b - 1 - a) 

134 

135 result = [] 

136 for o in output: 

137 if o[LETTER]: 

138 if o[FORM] == NOT_SUPPORTED or o[FORM] == UNSHAPED: 

139 result.append(o[LETTER]) 

140 else: 

141 result.append(LETTERS_ARABIC[o[LETTER]][o[FORM]]) 

142 

143 return ''.join(result)