Coverage for adhoc-cicd-odoo-odoo / odoo / tools / arabic_reshaper / __init__.py: 9%
58 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 18:05 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-09 18:05 +0000
1# The following code was copied from the original author's repository
2# at https://github.com/mpcabd/python-arabic-reshaper/tree/v3.0.0/arabic_reshaper
3# Version: 3.0.0
4# This work is licensed under the MIT License.
5# To view a copy of this license, visit https://opensource.org/licenses/MIT
6# Written by Abdullah Diab (mpcabd)
7# Email: mpcabd@gmail.com
8# Website: http://mpcabd.xyz
9#
10# This code was simplified by removing configuration (keeping only the default
11# configuration) then constant-folding all the configuration items by hand.
13import re
15from itertools import repeat
17from .letters import (UNSHAPED, ISOLATED, TATWEEL, ZWJ, LETTERS_ARABIC, FINAL,
18 INITIAL, MEDIAL, connects_with_letters_before_and_after,
19 connects_with_letter_before, connects_with_letter_after)
21__all__ = ['reshape']
23HARAKAT_RE = re.compile(
24 '['
25 '\u0610-\u061a'
26 '\u064b-\u065f'
27 '\u0670'
28 '\u06d6-\u06dc'
29 '\u06df-\u06e8'
30 '\u06ea-\u06ed'
31 '\u08d4-\u08e1'
32 '\u08d4-\u08ed'
33 '\u08e3-\u08ff'
34 ']',
36 re.UNICODE | re.VERBOSE
37)
40LIGATURES_RE = re.compile("""
41 \u0627\u0644\u0644\u0647 # ARABIC LIGATURE ALLAH
42 | \u0644\u0627 # ARABIC LIGATURE LAM WITH ALEF
43 | \u0644\u0623 # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE
44 | \u0644\u0625 # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW
45 | \u0644\u0622 # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE
46""", re.UNICODE | re.VERBOSE)
48GROUP_INDEX_TO_LIGATURE_FORMs = [
49 ('\N{ARABIC LIGATURE ALLAH ISOLATED FORM}', '', '', ''),
50 ('\N{ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF FINAL FORM}'),
51 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM}'),
52 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM}'),
53 ('\N{ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM}', '', '', '\N{ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM}'),
54]
57def reshape(text):
58 if not text:
59 return ''
61 output = []
63 LETTER = 0
64 FORM = 1
65 NOT_SUPPORTED = -1
67 for letter in text:
68 if HARAKAT_RE.match(letter):
69 pass
70 elif letter not in LETTERS_ARABIC:
71 output.append((letter, NOT_SUPPORTED))
72 elif not output: # first letter
73 output.append((letter, ISOLATED))
74 else:
75 previous_letter = output[-1]
76 if (
77 previous_letter[FORM] == NOT_SUPPORTED or
78 not connects_with_letter_before(letter, LETTERS_ARABIC) or
79 not connects_with_letter_after(previous_letter[LETTER], LETTERS_ARABIC) or
80 (previous_letter[FORM] == FINAL and not connects_with_letters_before_and_after(previous_letter[LETTER], LETTERS_ARABIC))
81 ):
82 output.append((letter, ISOLATED))
83 elif previous_letter[FORM] == ISOLATED:
84 output[-1] = (previous_letter[LETTER], INITIAL)
85 output.append((letter, FINAL))
86 # Otherwise, we will change the previous letter to connect
87 # to the current letter
88 else:
89 output[-1] = (previous_letter[LETTER], MEDIAL)
90 output.append((letter, FINAL))
92 # Remove ZWJ if it's the second to last item as it won't be useful
93 if len(output) > 1 and output[-2][LETTER] == ZWJ:
94 output.pop(len(output) - 2)
96 if output and output[-1][LETTER] == ZWJ:
97 output.pop()
99 # Clean text from Harakat to be able to find ligatures
100 text = HARAKAT_RE.sub('', text)
102 for match in LIGATURES_RE.finditer(text):
103 group_index = next((
104 i for i, group in enumerate(match.groups()) if group
105 ), -1)
106 forms = GROUP_INDEX_TO_LIGATURE_FORMs[group_index]
107 a, b = match.span()
108 a_form = output[a][FORM]
109 b_form = output[b - 1][FORM]
111 # +-----------+----------+---------+---------+----------+
112 # | a \ b | ISOLATED | INITIAL | MEDIAL | FINAL |
113 # +-----------+----------+---------+---------+----------+
114 # | ISOLATED | ISOLATED | INITIAL | INITIAL | ISOLATED |
115 # | INITIAL | ISOLATED | INITIAL | INITIAL | ISOLATED |
116 # | MEDIAL | FINAL | MEDIAL | MEDIAL | FINAL |
117 # | FINAL | FINAL | MEDIAL | MEDIAL | FINAL |
118 # +-----------+----------+---------+---------+----------+
120 if a_form in (ISOLATED, INITIAL):
121 if b_form in (ISOLATED, FINAL):
122 ligature_form = ISOLATED
123 else:
124 ligature_form = INITIAL
125 else:
126 if b_form in (ISOLATED, FINAL):
127 ligature_form = FINAL
128 else:
129 ligature_form = MEDIAL
130 if not forms[ligature_form]:
131 continue
132 output[a] = (forms[ligature_form], NOT_SUPPORTED)
133 output[a + 1:b] = repeat(('', NOT_SUPPORTED), b - 1 - a)
135 result = []
136 for o in output:
137 if o[LETTER]:
138 if o[FORM] == NOT_SUPPORTED or o[FORM] == UNSHAPED:
139 result.append(o[LETTER])
140 else:
141 result.append(LETTERS_ARABIC[o[LETTER]][o[FORM]])
143 return ''.join(result)