/
NastaliqKerning.py
492 lines (425 loc) · 19.7 KB
/
NastaliqKerning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
"""NastaliqKerning plugin
This plugin provides two FEZ verbs - `NastaliqKerning` and
`AtHeight`. These are related in that they compute the "height"
of a glyph sequence and chain to a different routine based on
that height. In the case of `NastaliqKerning`, we evaluate all
sequences beginning with an initial glyph, compute the height
of that sequence, and also create a kerning table for that height.
In `AtHeight`, we evaluate all sequences, and if they fall within
a specified height range, dispatch to an arbitrary routine. (This
is used for the dot avoidance code.)
The syntax of each verb is:
NastaliqKerning <units> <percentage>%
Creates kern tables which bring together the initial and final
glyphs to within `units` units of one another or within the specified
percentage of the width of the final glyph.
You will want to read https://simoncozens.github.io/nastaliq-autokerning/
before trying to understand this code.
AtHeight <units1>-<units2> <routine>
Chains to the given routine for all sequences of glyphs between
`units1` and `units2` high. The context of the chained routine will
be the final/isolate glyph of the previous sequence, a space (if there
is one), and the glyph sequence. For example, in the case of the
sequence `لو پجل`, the height will be computed as 400 units in the
case of Gulzar; if 400 is between `units1` and `units2`, then the
routine will be called with `VAOf1` as the start of the chained glyph
sequence.
"""
import logging
import sys
import warnings
from glob import glob
from itertools import product
import csv
import fontFeatures
import tqdm
from fez import FEZVerb
from fontTools.feaLib.variableScalar import VariableScalar
from glyphtools import bin_glyphs_by_metric
from kerndeterminer import KernDeterminer
PARSEOPTS = dict(use_helpers=True)
GRAMMAR = ""
NastaliqKerning_GRAMMAR = """
?start: action
action: (variable_scalar | integer_container) integer_container "%"
"""
AtHeight_GRAMMAR = """
?start: action
action: integer_container "-" integer_container BARENAME
"""
VERBS = ["NastaliqKerning", "AtHeight"]
logging.basicConfig(format="%(message)s")
logger = logging.getLogger("NastaliqKerning")
logger.setLevel(logging.WARN)
# Number of different "rise" groups, used in clustering the glyphs
# to determine the sequence height. This is O(n^2) in number of
# generated layout rules, so increasing this to 4 will overflow
# the font builder! 3 seems accurate enough for practical purposes.
ACCURACY1 = 3
# Controls the number of kern tables to be generated by rounding
# the computed height to this number of units. i.e. at baseline,
# at height of 100 units, at height of 200 units, etc. - up to...
RISE_QUANTIZATION = 100
# the maximum height we care about. All higher sequences will have
# the same kerning applied to them.
MAXIMUM_RISE = 600
# Rounding the kern values allows them to be stored more efficiency
# in the OpenType binary.
KERN_QUANTIZATION = 10
MINIMUM_KERN = -20 # If it's not less than this, don't bother
# Only consider sequences of this length - for longer sequences,
# we start counting height from the *medial* instead of the final.
MAXIMUM_WORD_LENGTH = 5
def taper_schedule(height):
"""For cross-space kerning (when there is a space between the two
letters being kerned), we take a different approach, looking purely
at the horizontal ink-to-ink distance between the glyphs. At the
baseline, we keep the ink-to-ink distance equal to the target distance,
but at the left glyph's height increases, there is a gap underneath it,
and so we taper the kern based on this height to avoid the gap."""
if height < 200:
taper = 1.0
elif height < 300:
taper = 0.9
elif height < 400:
taper = 0.8
elif height < 500:
taper = 0.7
else:
taper = 0.6
return taper
def quantize(number, degree):
"""Round off a number to the nearest ``degree``"""
return degree * round(number / degree)
def zero_out_nonnegative_and_quantize(varkern, degree):
"""Takes a variable kern, weeds out (and quantizes) any non-negative
kerns, and returns a variable kern if there's anything left"""
new_values = {}
for location, kern in varkern.values.items():
if kern < -10:
new_values[location] = quantize(kern, degree)
else:
new_values[location] = 0
if any(v <= MINIMUM_KERN for v in new_values.values()):
varkern.values = new_values
else:
varkern.values = {}
return varkern
class NastaliqKerning(FEZVerb):
"""Kern a Nastaliq font."""
def action(self, args):
"""Main entry point."""
# Read the parameters
self.distance_at_closest = args[0].resolve_as_integer()
self.variable = True
if not isinstance(self.distance_at_closest, VariableScalar):
# Let's make it one
self.variable = False
new_distance = VariableScalar()
new_distance.values[tuple(self.parser.font.default_master.location.items())] = self.distance_at_closest
self.distance_at_closest = new_distance
self.maxtuck = args[1].resolve_as_integer() / 100.0
self.ink_to_ink_routines = {}
self.kerner = KernDeterminer(glob("sources/build/*.glyphs")[0])
# Read a few useful classes into Python variables.
self.inits = self.parser.fontfeatures.namedClasses["inits"]
medis = self.parser.fontfeatures.namedClasses["medis"]
bariye = self.parser.fontfeatures.namedClasses["bariye"]
self.isols = [
x for x in self.parser.fontfeatures.namedClasses["isols"] if x not in bariye
]
finas = [
x for x in self.parser.fontfeatures.namedClasses["finas"] if x not in bariye
]
self.isols_finas = list(set(self.isols + finas) | set(bariye))
# These glyphs are special cased. We should probably read
# `blockers` from a glyph class, really, instead of hard
# coding it.
blockers = ["AINf1", "JIMf1"]
# Now we cluster the medials and finals based on their
# rise.
binned_medis = bin_glyphs_by_metric(
self.parser.font, medis, "rise", bincount=ACCURACY1
)
binned_finas = bin_glyphs_by_metric(
self.parser.font, finas, "rise", bincount=ACCURACY1
)
# This will hold kern tables for each rise value.
self.kern_at_rise = {}
routines = []
# The main entry to our kerning routine. We ignore marks
# and ligatures (spaces)
routine = fontFeatures.Routine(name="NastaliqKerning")
routine.flags = 0x04 | 0x08
routines.append(routine)
self.debug_csv = csv.writer(open("/tmp/debugkern.csv", "w"))
# We will build our word sequences, from longest to shortest.
# `i` will count medial and final glyphs, not including the
# initial glyph, which is why we go down to zero.
for i in range(MAXIMUM_WORD_LENGTH, -1, -1):
postcontext_options = [binned_finas] + [binned_medis] * i
warnings.warn("Length " + str(i))
# This iterator returns all sequences of glyph groups.
# For example, when `i` is 2 it will return
# binned_finas[0] binned_medis[0] binned_medis[0]
# binned_finas[0] binned_medis[0] binned_medis[1]
# binned_finas[0] binned_medis[0] binned_medis[2]
# binned_finas[0] binned_medis[1] binned_medis[0]
# binned_finas[0] binned_medis[1] binned_medis[1]
# ...
# binned_finas[2] binned_medis[2] binned_medis[2]
all_options = product(*postcontext_options)
for postcontext_plus_rise in all_options:
# Each group is a two-element tuple: the glyphs in
# the group and the median rise for each group. By
# summing the second element of each group, we get
# the height of this sequence.
word_tail_rise = quantize(
sum(x[1] for x in postcontext_plus_rise), RISE_QUANTIZATION
)
if word_tail_rise < 0:
continue
# And by reading the first element, we get the glyphs
# involved.
postcontext = list(reversed([x[0] for x in postcontext_plus_rise]))
# warnings.warn("%s - %i" % (postcontext, word_tail_rise))
if word_tail_rise >= MAXIMUM_RISE:
word_tail_rise = MAXIMUM_RISE
if i == MAXIMUM_WORD_LENGTH:
# Drop the fina, so that we match all sequence
# starting with these glyphs.
postcontext.pop()
# The right hand side of our glyph pair
target = [self.isols_finas]
lookups = [[self.generate_kern_table_for_rise(word_tail_rise)]]
# Are there any blocking final glyphs in this sequence?
do_blockers = False
if any(blocker in postcontext[-1] for blocker in blockers):
# If so, remove them from the group and handle them later;
# by unconditionally separating them from the group, we
# keep the groups constant across the whole lookup, which
# allows them to be represented as a format 2 lookup
# which is very efficient.
postcontext[-1] = list(set(postcontext[-1]) - set(blockers))
do_blockers = True
# Call the appropriate kern table for this sequence
routine.rules.append(
fontFeatures.Chaining(
target,
postcontext=[self.inits] + postcontext,
lookups=lookups,
)
)
# We now deal with blocking glyphs. If the sequence length
# is 1 (init + final), skip it; otherwise, add it.
if len(postcontext) > 1 and do_blockers:
postcontext[-1] = blockers
routine.rules.append(
fontFeatures.Chaining(
target,
postcontext=[self.inits] + postcontext,
lookups=lookups,
)
)
if word_tail_rise >= 400 and i > 4:
# HACK
# This has to be done separately to make the classes work
postcontext[-1] = ["BARI_YEf1"]
routine.rules.append(
fontFeatures.Chaining(
target,
postcontext=[self.inits] + postcontext,
lookups=lookups,
)
)
# Finally, kern isolates against each other.
target = [self.isols_finas]
lookups = [[self.generate_kern_table_for_rise(0)]]
routine.rules.append(
fontFeatures.Chaining(target, lookups=lookups, postcontext=[self.isols])
)
return routine.rules
def master_for_location(self, location):
masters = self.parser.font.masters
this_master = [m for m in masters if tuple(m.location.items()) == location]
if not this_master:
raise ValueError(
f"Could not find master for location {location};"
f" master locations were: {[m.location for m in masters]}"
)
return this_master[0]
def determine_kern_cached(self, glyph1, glyph2, height):
maxtuck = self.maxtuck or 0.4
# Determines the kern; the heavy lifting is done in
# kerndeterminer, we just orchestrate the handling of variable
# font masters
variable_kern = VariableScalar()
variable_kern.axes = self.parser.font.axes
for location, targetdistance in self.distance_at_closest.values.items():
master = self.master_for_location(location)
kern = self.kerner.determine_kern(
glyph1,
glyph2,
master.name.get_default(),
targetdistance,
height,
maxtuck,
)
variable_kern.values[location] = kern
return variable_kern
def ink_to_ink_at(self, rise):
if rise in self.ink_to_ink_routines:
return self.ink_to_ink_routines[rise]
# Taper distance based on rise to make it visually equal!
taper = taper_schedule(rise)
ink_to_ink = fontFeatures.Routine(f"ink_to_ink_{rise}", flags=0x8 | 0x4)
font = self.parser.font
for right in self.isols_finas:
for left in self.inits + self.isols:
kern = VariableScalar()
kern.axes = font.axes
for location, targetdistance in self.distance_at_closest.values.items():
master = self.master_for_location(location)
space_width = master.get_glyph_layer("space.urdu").width
right_of_left = max(master.get_glyph_layer(left).rsb, 0)
left_of_right = max(master.get_glyph_layer(right).lsb, 0)
dist = int(
(space_width * taper) - (right_of_left + left_of_right)
)
kern.values[location] = dist
kern = zero_out_nonnegative_and_quantize(kern, KERN_QUANTIZATION)
self.debug_csv.writerow(["ink_to_ink", rise, left, right, str(kern)])
if kern.values:
if not self.variable:
kern = kern.default
ink_to_ink.rules.append(
fontFeatures.Positioning(
[[right], [left]],
[
fontFeatures.ValueRecord(),
fontFeatures.ValueRecord(xAdvance=kern),
],
)
)
self.ink_to_ink_routines[rise] = self.parser.fontfeatures.referenceRoutine(
ink_to_ink
)
return self.ink_to_ink_routines[rise]
def generate_kern_table_for_rise(self, rise):
if rise in self.kern_at_rise:
return self.kern_at_rise[rise]
rise = quantize(rise, RISE_QUANTIZATION)
kerntable = {}
print(f"Generating table for rise {rise}", file=sys.stderr)
# At the baseline, the left glyph of the sequence is all the
# isolates and initials; but if there is a rise, we must
# have seen a medial/final before it so we ignore the isolates.
if rise > 0:
ends = self.inits
else:
ends = self.inits + self.isols
# So this is easy; we just go through every combination and
# determine the kern.
with tqdm.tqdm(total=len(ends) * len(self.isols_finas), miniters=30) as pbar:
for end_of_previous_word in self.isols_finas:
kerntable[end_of_previous_word] = {}
for initial in sorted(
ends
): # initial of "long" sequence, i.e. left glyph
logger.info("Left glyph: %s", initial)
logger.info("Right glyph: %s", end_of_previous_word)
kern = self.determine_kern_cached(
initial, end_of_previous_word, height=rise
)
logger.info(
"%s - %s @ %i : %s", initial, end_of_previous_word, rise, kern
)
# Only record a kern if we are actually bringing two glyphs closer.
kern = zero_out_nonnegative_and_quantize(kern, KERN_QUANTIZATION)
self.debug_csv.writerow(["kern", rise, initial, end_of_previous_word, str(kern)])
if kern.values:
if not self.variable:
kern = kern.default
kerntable[end_of_previous_word][initial] = kern
pbar.update(1)
# Once we've done so, we stick it in a pair positioning routine.
kernroutine = fontFeatures.Routine(
rules=[],
name=f"kern_at_{rise}",
)
kernroutine.flags = 0x08 | 0x04
abovemarks = self.parser.fontfeatures.namedClasses["all_above_marks"]
kernroutine.markFilteringSet = abovemarks
for left, kerns in kerntable.items():
for right, value in kerns.items():
kernroutine.rules.append(
fontFeatures.Positioning(
[[left], [right]],
[
fontFeatures.ValueRecord(),
fontFeatures.ValueRecord(xAdvance=value),
],
)
)
kernroutine = self.parser.fontfeatures.referenceRoutine(kernroutine)
# This kern routine is going to dispatch differently depending on
# a) height and b) whether or not there is a space.
dispatch = fontFeatures.Routine(name=f"dispatch_{rise}", flags=0x8)
ink_to_ink_routine = self.ink_to_ink_at(rise)
if ink_to_ink_routine.routine.rules:
dispatch.rules.append(
fontFeatures.Chaining(
[self.isols_finas, ["space.urdu"], ends],
lookups=[[ink_to_ink_routine], [], []],
)
)
if kernroutine.routine.rules:
dispatch.rules.append(
fontFeatures.Chaining(
[self.isols_finas, ends], lookups=[[kernroutine], [], []]
)
)
self.kern_at_rise[rise] = dispatch
return dispatch
# This is just a generic version of the above.
class AtHeight(FEZVerb):
def action(self, args):
(height_lower, height_upper, target_routine) = args
height_lower = height_lower.resolve_as_integer()
height_upper = height_upper.resolve_as_integer()
target_routine = self.parser.fontfeatures.routineNamed(target_routine)
self.inits = self.parser.fontfeatures.namedClasses["inits"]
medis = self.parser.fontfeatures.namedClasses["medis"]
isols = self.parser.fontfeatures.namedClasses["isols"]
finas = self.parser.fontfeatures.namedClasses["finas"]
self.isols_finas = isols + finas
binned_medis = bin_glyphs_by_metric(
self.parser.font, medis, "rise", bincount=ACCURACY1
)
binned_finas = bin_glyphs_by_metric(
self.parser.font, finas, "rise", bincount=ACCURACY1
)
routine = fontFeatures.Routine(
name=f"At_{height_lower}_{height_upper}_{target_routine.name}"
)
routine.flags = 0x04 | 0x08
for i in range(MAXIMUM_WORD_LENGTH, -1, -1):
postcontext_options = [binned_finas] + [binned_medis] * i
all_options = product(*postcontext_options)
for postcontext_plus_rise in all_options:
word_tail_rise = quantize(
sum(x[1] for x in postcontext_plus_rise), RISE_QUANTIZATION
)
postcontext = list(reversed([x[0] for x in postcontext_plus_rise]))
if word_tail_rise < height_lower or word_tail_rise > height_upper:
continue
target = [self.isols_finas, self.inits]
lookups = [[target_routine]] + [None] * (len(target) - 1)
routine.rules.append(
fontFeatures.Chaining(
target,
postcontext=postcontext,
lookups=lookups,
)
)
return [routine]