Sindbad~EG File Manager
//
// svgSymbolText.c
// Engine
//
// Created by Michael Beeson on 1/31/24.
// 2.14.24 added viewBox in symbolTextElement
// 2.19.24 added code around line 417 to not count a letter as alone if it's preceded by UTF-8, as in \'e\'en; and code to deal with log(b,z)
// 2.20.24 added c+=3 at line 543
// 2.26.24 changed code at the stopping condition of svgSymbolText, which
// was not updating next properly.
// 3.12.24 modified SymbolText to use correct fill and stroke
// 4.6.24 made svg_colorstring handle alpha
// 4.8.24 added AVG_CHARWIDTH
// 4.9.24 modified svgTextInvisibleElement to NOT leave margins of 8 above and below.
// 4.9.24 corrected computation of with in symbolText at "NO"
// 4.19.24 adjusted height in svgSymbolTextInvisibleElement to allow more for integral signs.
// 4.20.24 made svgSymbolTextInvisibleElement cope with $$.
// 4.21.24 made AVG_CHARWIDTH a decimal instead of an int
// 4.21.24 corrected svgSymbolText at "NO!"
// 4.21.24 added \neq to TeXtoUTF8
// 4.22.24 modified svgSymbolText to handle x^(?n)
// 4.24.24 wrote svgSymbolTextElement_aux, thus enabline symbolTextInvisibleElement
// and SymbolTextElement to handle display math after regular text.
// 4.24.24 added \supn to TeXtoUTF8
// 4.30.24 corrected svgSymbolTextElement_aux at "not svgOut"
// 4.30.24 added code aroung line 832 to get i^(4n), i^(4n+1), etc. to have the 4 ih Roman
// 4.30.24 added +12 to the width calculation to prevent a+b=b+a truncating.
// 4.30.24 at line 723, strcat instead of mistaken strcpy
// 4.30.24 added two lines at "coming out of exponent now"
// 5.1.24 added code near 'buffertemp'
// 5.1.24 made it handle ..^\\lim v
// 5.2.24 changed strcpy to strcat at line 545
// 5.11.24 introduced gap in symbolTextElement_aux
// 5.16.24 added the second claused under "rule 4"
// added a few lines around line 1038
// 5.18.24 changed the placement of 'firstflag = 0' in svgSymbolText
// 5.30.24 added Rule 8 in SymbolText
// 6.13.24 modified Rule 8 to include '^'
// 7.8.24 corrections to SymbolText on subscripts
// 7.23.24 added code line 653, see (a/b)
// 7.26.24 made it accept "
// 10.1.24 made svgSymbolText accept '&'
// 11.7.24 made svgSymbolText translate '<' and '<=' to < and ≤
// 11.8.24 changed c[1] to c[0] at line 590
// 11.8.24 added code at buf2 to prevent "is," coming out in italics
// 11.8.24 more code near "rule 8" to get initial a=0 in italics
// 11.8.24 more code to write </tspan> after every subscript
// 11.8.24 added code to make sure height is at least 20 if there's a subscript.
// 11.11.24 added code to accept 2^(1/2) and 2^(5/2)
// 11.12.24 added code to make sure height is at least 18 if there's a superscript.
// 11.14.24 added \ne to TeXtoUTF8
// 11.14.24 added k>=2 etc. near c[2]
// 11.15.24 added the +1 in malloc(strlen(reason)+1);
// 11.18.24 added width += termwidth+gap;
// 11.18.24 added code to increase *width on input \\to because the arrow is extra long
// 1.14.25 line 1137, uncommented code to write /tspan. (Why/when was it commented out?)
// 1.14.25 removed code setting min height with superscript
// 1.29.25 added ++c at line 1118
// 2.16.25 charwidth needs to be a double in two places, since AVG_CHARWIDTH is a double.
#include <assert.h>
#include <string.h>
#include <stddef.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h> // for malloc
#include "svgSymbolText.h"
#include "preferences.h" // get_selected_language()
#include "english.h" // two_letter_word
#include "terms.h"
#include "display.h"
#include "charstr.h"
#include "dispfunc.h"
#include "display1.h"
#include "parser.h" /* MakeSelectorMenu */
#include "pcontrol.h" /* get_parser_flags */
#include "defns.h" /* needed by vaux.h */
#include "vaux.h" /* parameter, needed by termtoSVG.h */
#include "termtoSVG.h" /* MakeSelectorMenu */
#define BUFSIZE 200 /* max length in bytes of contiguous non-space characters this function can handle */
/* BUFSIZE needs to be big to handle Chinese, which just has character after character with no spaces. */
static char buffer[BUFSIZE];
#define AVG_CHARWIDTH 7.7 // we need the precision, so it can't be an int
// any smaller, and "the problem" is slightly truncated
static int is_greek(char *t);
static void svgSymbolTextElement_aux(char *reason, // reason or menu string with TeX-like abbreviations
char *svgOut, // where to write the SVG code
char *id, // id for the resulting SVG element, e.g. reason3
char *class, // if not NULL, use it for the class of the SVG element
int outbuffersize, // how much space is available
int color, // the color to use
int x, // x-coordinate to use
int y, // y-coordinate to use
char *display);
/*__________________________________________________________________*/
// This function checks bytes of a UTF-8 sequence
// Returns the number of bytes the UTF-8 character spans (when
// scanning the first byte), or 5 when scanning a continuation byte.
int isalpha8(char c) {
if ((c & 0x80) == 0) return isalpha(c); // 0xxxxxxx, ASCII
if ((c & 0xE0) == 0xC0) return 2; // 110xxxxx, start of 2-byte char
if ((c & 0xF0) == 0xE0) return 3; // 1110xxxx, start of 3-byte char
if ((c & 0xF8) == 0xF0) return 4; // 11110xxx, start of 4-byte char
if ((c & 0xC0) == 0x80) return 5; // 10xxxxxx, continuation byte
return 0; // Not a valid UTF-8 start byte or continuation byte
}
/*__________________________________________________________________*/
static char *TeXtoUTF8(char *in)
// convert TeX codes like \sqrt to UTF* codes.
// return "" if nothing matches.
{
if (!strcmp(in, "alpha"))
return "\xCE\xB1";
if (!strcmp(in, "beta"))
return "\xCE\xB2";
if (!strcmp(in, "gamma"))
return "\xCE\xB3";
if (!strcmp(in, "pi"))
return "\xCF\x80";
if (!strcmp(in, "sum"))
return "\xE2\x88\x91";
if (!strcmp(in, "sigma"))
return "\xCF\x83";
if (!strcmp(in, "mu"))
return "\xCE\xBC";
if (!strcmp(in, "littlegamma"))
return "\xCE\xB3";
if (!strcmp(in, "theta"))
return "\xCE\xB8";
if (!strcmp(in, "delta"))
return "\xCE\xB4";
if (!strcmp(in, "infinity"))
return "\xE2\x88\x9E";
if (!strcmp(in, "infty"))
return "\xE2\x88\x9E";
if (!strcmp(in, "phi"))
return "\xCF\x86";
if (!strcmp(in, "in"))
// return "\xE2\x88\x88";
return "\xE2\x88\x8A";
if (!strcmp(in, "lambda"))
return "\xCE\xBB";
if (!strcmp(in, "pm"))
return "\xC2\xB1";
if (!strcmp(in, "ge"))
return "\xE2\x89\xA5"; // Greater-than-or-equals-to (≥)
if (!strcmp(in, "le"))
return "\xE2\x89\xA4"; // Less-than-or-equals-to (≤)
if (!strcmp(in, "int"))
return "\xE2\x88\xAB";
if (!strcmp(in, "integral"))
return "\xE2\x88\xAB";
if (!strcmp(in, "deg"))
return "\xC2\xB0";
if (!strcmp(in, "times"))
return "\xC2\xB7";
if (!strcmp(in, "cdot"))
return "\xC2\xB7";
if (!strcmp(in, "sqrt"))
return "\xE2\x88\x9A";
if (!strcmp(in, "root"))
return "\xE2\x88\x9A";
if (!strcmp(in, "vert"))
return "\xC2\xA3";
if (!strcmp(in, "onehalf"))
return "\xC2\xBD";
if (!strcmp(in, "onequarter"))
return "\xC2\xBC";
if (!strcmp(in, "to"))
return "\xE2\x86\x92";
if (!strcmp(in, "zeta"))
return "\xCE\xB6";
if (!strcmp(in, "neq"))
return "\xE2\x89\xA0";
if (!strcmp(in, "supn"))
return "\xE2\x81\xBF";
if (!strcmp(in, "ne"))
return "\xE2\x89\xA0";
assert(0);
return ""; // Signaling an unrecognized backslash code
}
/*______________________________________________________________*/
void svgSymbolTextElement(char *reason, // reason or menu string with TeX-like abbreviations
char *svgOut, // where to write the SVG code
char *id, // id for the resulting SVG element, e.g. reason3
char *class, // if not NULL, use it for the class of the SVG element
int outbuffersize, // how much space is available
int color, // the color to use
int x, // x-coordinate to use
int y) // y-coordinate to use
/* Produce an SVG element with absolute positioning.
Replace TeX-like codes such as \sqrt or x^n with UTF-8 and SVG
*/
{ svgSymbolTextElement_aux(reason,svgOut, id, class,outbuffersize,color,x,y,"block");
}
/*______________________________________________________________*/
void svgSymbolTextInvisibleElement(char *reason, // reason or menu string with TeX-like abbreviations
char *svgOut, // where to write the SVG code
char *id, // id for the resulting SVG element, e.g. reason3
char *class, // if not NULL, use it for the class of the SVG element
int outbuffersize, // how much space is available
int color, // the color to use
int x, // x-coordinate to use
int y) // y-coordinate to use
/* Produce an SVG element with absolute positioning.
Replace TeX-like codes such as \sqrt or x^n with UTF-8 and SVG
set display:none to make it invisible.
*/
{ svgSymbolTextElement_aux(reason,svgOut,id,class,outbuffersize,color,x,y,"none"); // display:none
}
/*______________________________________________________________*/
static void lower_text(char *buffer, int deltay)
/* increase the y-coordinate of each <text> element in buffer by deltay.
Assumes that the existing y-coordinates are two digits and that the
resulting new y-coordinates will also be two digits.
So it's enough to replace y=\"13\" by "y=\"21\" for example, if deltay = 8.
*/
{
char *p = buffer;
while ((p = strstr(p, "y=\"")) != NULL) { // Find 'y="'
char *digits = p + 3; // Point to the character right after 'y="'
// Check if the next two characters are digits and followed by a quote character
if (isdigit(digits[0]) && isdigit(digits[1]) && digits[2] == '\"') {
int value = (digits[0] - '0') * 10 + (digits[1] - '0'); // Convert two digits to integer
value += deltay; // Increase the integer by deltay
// Replace the two digits with the new value
digits[0] = (value / 10) + '0'; // Tens place
digits[1] = (value % 10) + '0'; // Units place
p = digits + 3; // Move past the digits and the closing quote to continue the search
} else {
p++; // Move to the next character and continue searching
}
}
}
/*______________________________________________________________*/
static void svgSymbolTextElement_aux(char *reason, // reason or menu string with TeX-like abbreviations
char *svgOut, // where to write the SVG code
char *id, // id for the resulting SVG element, e.g. reason3
char *class, // if not NULL, use it for the class of the SVG element
int outbuffersize, // how much space is available
int color, // the color to use
int x, // x-coordinate to use
int y, // y-coordinate to use
char *display) // the display property, "none" or "block"
/* Produce an SVG element with absolute positioning.
Replace TeX-like codes such as \sqrt or x^n with UTF-8 and SVG
set display:none to make it invisible.
*/
{
int width =0;
int height = 16;
int level = 0;
int delta = 0;
int gap = 0; // if reason begins with a displayed term, this will be set to 16
// and used as space after the displayed term.
int termheight = 0;
int termwidth = 0;
if(strstr(reason,"\\int"))
{ delta = 4;
height +=delta; // the one-line integral sign is tall, and we don't want it truncated
}
// First we start the <svg> element:
if(class == NULL)
snprintf(svgOut, outbuffersize, "<svg id=\"%s\" width=\"%d\" height=\"%d\" style=\"display:%s;position:absolute;left: %dpx; top: %dpx;\" xmlns=\"http://www.w3.org/2000/svg\">\n", id, 800,height,display,x,y); // 800 has to be replaced when we know the width
else // 3.14.25 removed \n in the following line and added px here and just above
snprintf(svgOut, outbuffersize, "<svg id=\"%s\" class=\"%s\" width=\"%d\" height=\"%d\" style=\"display:%s;position:absolute;left: %dpx; top: %dpx;\" xmlns=\"http://www.w3.org/2000/svg\">", id, class, 800,100,display,x,y); // 800 has to be replaced when we know the width, and 100 when we know the height.
size_t byteswritten = strlen(svgOut);
size_t byteswritten2 = 0;
size_t byteswritten3 = 0;
size_t byteswritten4 = 0;
char *displaymarker = NULL;
if(byteswritten + strlen(reason) + 100 >= outbuffersize)
assert(0);
outbuffersize -= byteswritten;
// It is allowed that the input text begins with $$...$$, and that part
// should be displayed.
if(reason[0] == '$' && reason[1] == '$')
{
gap = 12; // explained above
char *p = strstr(reason+2, "$$");
if(p == NULL)
assert(0);
char buffer[256];
int j = 0;
char *marker2;
for(marker2 = reason+2; marker2 < p; ++marker2,++j)
buffer[j] = *marker2;
buffer[j] = '\0';
int err;
char *rest;
term t,bt;
err = bparse(get_parser_flags(),&t,buffer,&rest);
if(err)
assert(0);
// we need the height, so
bblock(t,&bt);
termheight = HEIGHT(bt);
termwidth = (int) WIDTH(bt);
level = LEVEL(bt);
destroy_bblocked_term(bt);
termtoColoredSVG(t,svgOut+byteswritten,outbuffersize,8,0,color);
byteswritten2 = strlen(svgOut+byteswritten);
if(byteswritten + byteswritten2 + strlen(reason) + 100 >= outbuffersize)
assert(0);
outbuffersize -= byteswritten2;
reason = p+2; // past the closing $$
while (*reason == ' ')
++ reason; // skip spaces
}
// Now there either was no initial $$, or we're past its closing $$ as well
if(*reason != '\0')
{ displaymarker = strstr(reason,"$$"); // start of next display math if there is one
int y3 = level>=10 ? level-8: delta;
if(displaymarker)
{ /* for example, reason = "cancel $$2^(2^3)$$", displaymarker points to the first $$ */
/* so we allow reason to begin or end with display math */
/* reason is possibly declared const so can't replace displaymarker temporarily */
char *buffertemp = malloc(strlen(reason)+1);
// I used malloc rather than mallocate, as this code is used
// in svgTest.c where no MathXpert heap is present.
strcpy(buffertemp,reason);
char *marker2 = strstr(buffertemp,"$$");
*marker2 = '\0'; // buffertemp is not constant so this is OK
// output the non-display-math part before the display math
svgSymbolText(buffertemp,svgOut+byteswritten+byteswritten2,outbuffersize,color,termwidth+gap,y3,12,&width);
width += termwidth+gap;
free(buffertemp);
}
else
{ gap = (termwidth == 0) ? 0: 16;
svgSymbolText(reason,svgOut+byteswritten+byteswritten2,outbuffersize,color,termwidth+gap,y3,12,&width);
width += termwidth+gap;
}
}
if(displaymarker)
{
byteswritten3 = strlen(svgOut + byteswritten + byteswritten2);
if(byteswritten + byteswritten2 + byteswritten3 + strlen(reason) + 100 >= outbuffersize)
assert(0);
outbuffersize -= byteswritten3;
char *p = strstr(displaymarker+2, "$$");
if(p == NULL)
assert(0);
char buffer[256];
int j = 0;
char *marker2;
for(marker2 = displaymarker+2; marker2 < p; ++marker2,++j)
buffer[j] = *marker2;
buffer[j] = '\0';
int err;
char *rest;
term t,bt;
err = bparse(get_parser_flags(),&t,buffer,&rest);
if(err)
assert(0);
// we need the height, so
bblock(t,&bt);
termheight = HEIGHT(bt);
termwidth = (int) WIDTH(bt);
level = LEVEL(bt);
destroy_bblocked_term(bt);
int textlevel = 9; // the "level line" of normal text ?
// can't decrease it or descenders like 'g' are truncated
int deltay = level - textlevel;
// Now increase all the y-coordinates of the <text> element
// produced so far, so as to lower e.g. "cancel" to line up with the cancelled term.
lower_text(svgOut,deltay);
double charwidth = AVG_CHARWIDTH;
if(get_selected_language() == CHINESE)
charwidth = 16;
termtoColoredSVG(t,svgOut+byteswritten+byteswritten2+byteswritten3,outbuffersize,
(int)(width*charwidth + 0.5), // round to an int
0,
color
);
byteswritten4 = strlen(svgOut+byteswritten+byteswritten2+byteswritten3);
if(byteswritten + byteswritten2 +byteswritten3 + byteswritten4 + strlen(reason) + 100 >= outbuffersize)
assert(0);
outbuffersize -= byteswritten4;
}
char *marker = strstr(svgOut,"800");
// We will replace 800 with the actual width
// Note: we did not do svgOut += byteswritten, so we still have the original svgOut at hand
// But 'width' is in characters while termwidth is in papyrus coords
double charwidth = AVG_CHARWIDTH;
if(get_selected_language() == CHINESE)
charwidth = 16;
width = (int)(width * charwidth + 0.5) + termwidth + 20;
// Now, width is in CSS pixels = papyrus coordinates
char temp[20];
assert(sizeof(temp) == 20);
if(width >= 1000)
{
printf("input more than 1000 pixels wide!\n");
printf("%s\n",reason); // but go on anyway, it's OK
width = 999; // it won't matter that this is shorter than reality
}
snprintf(temp,sizeof(temp),"%03d",width);
// if being used for a displayed reason string, need 3 characters so it can be
// made larger without disturing nearby characters
char *p = temp;
for(int k = 0;k<3;k++)
marker[k] = p[k];
marker = strstr(marker+3,"100");
// not svgOut where we have marker+3, as it can happen that the width was 100,
// so with sgvOut there, we would land on the just-written width. It actually happened!
if(marker)
{ if(termheight >= height)
height = termheight;
if(strstr(svgOut,"shift:sub"))
{ // there is a subscript, height needs to be big enough
if(height < 20)
height = 20;
}
#if 0
// actually 16 looks fine
if(strstr(svgOut,"shift:sup"))
{ // there is a superscript, height needs to be big enough
if(height < 18)
height = 18;
}
#endif
snprintf(temp,sizeof(temp),"%03d",height);
p = temp;
// replace 100 with the actual height
for(int k = 0;k<3;k++)
marker[k] = p[k];
}
strcat(svgOut,"</svg>\n");
}
/*______________________________________________________________*/
static int pointstoUTF8(const char *c, int *bytes) {
// Check if c points to a valid UTF-8 character with more than one byte
if ((c[0] & 0xC0) == 0x80) {
// It's a continuation byte, return 0
return 0;
}
// Determine the number of bytes in the UTF-8 character
if ((c[0] & 0xE0) == 0xC0) {
// Two-byte character
*bytes = 2;
return 1;
} else if ((c[0] & 0xF0) == 0xE0) {
// Three-byte character
*bytes = 3;
return 1;
} else if ((c[0] & 0xF8) == 0xF0) {
// Four-byte character
*bytes = 4;
return 1;
}
// Invalid UTF-8 sequence
return 0;
}
/*______________________________________________________________*/
static char *greeks[] = { "alpha", "beta", "gamma", "delta", "psi", "phi", "varphi",
"Psi","chi","xi","epsilon","sigma", "mu", "theta","lambda"};
static int is_greek(char *t)
// return 1 if t is the name of a Greek character that should be rendered in italics
// return 0 otherwise
{ int k = sizeof(greeks)/sizeof(char *);
int i;
for (i=0;i<k;i++)
{ if (! strcmp(t,greeks[i]))
return 1;
}
return 0;
}
/*______________________________________________________________*/
static char *vowels = "aeioAEIOU"; // but y doesn't count as a vowel here and neither does u
/*______________________________________________________________*/
int svgSymbolText(char *c, // reason or menu string with TeX-like abbreviations
char *svgOut, // where to write the SVG code
int outbuffersize, // how much space is available
int color, // the color to use as an RGB value
int x, // starting x-coordinate
int y, // starting y-coordinate. 0 for menus, 8 for reasons
int fontsizeIn, // in points
int *width // returned in characters, not pixels or bytes
)
/* Write one <text> element to svgOut, translating 'reason' to UTF-8 text,
Replace TeX-like codes such as \sqrt or x^n with UTF-8 and SVG,
and handle superscripts and subscripts. Changes from roman to italic,
and font size for superscripts and subscripts, are handled with <tspan>,
so we need only one <text> element.
UTF-8 text is allowed in the input, and just gets copied to the output.
At exit *width contains the number of characters written to svgOut,
not the number of bytes, i.e. UTF-8 multibyte characters are counted as one.
Return the number of bytes written.
*/
{ *width = 0;
int dollarflag = 0;
int bigfont = fontsizeIn;
int smallfont = 8;
int fontsize = bigfont;
int language_number = get_selected_language();
int firstflag = 1; /* set to zero when c[-1] is a legal reference */
int ascent = 13;
int k;
int baseline = y+ascent;
char colorbuf[32];
svg_colorstring(colorbuf, color); // removed newlines, both actual and \n in next lines 3.14.25
snprintf(svgOut, outbuffersize,"<text x = \"%d\" y=\"%d\"style=\"font-style:roman;font-size:%dpt;fill:%s;stroke:none;\">", x,baseline,fontsize,colorbuf);
int byteswritten = (int)strlen(svgOut);
outbuffersize -= byteswritten;
char *next = svgOut + byteswritten;
int bytes;
start: /* tail-recursion is implemented using goto */
if(fontsize != bigfont)
assert(0); // no recursion from exponents
fontsize = bigfont;
if (pointstoUTF8(c,&bytes))
{ for(int k = 0; k<bytes;k++)
{ next[k] = c[k];
}
next[bytes] = '\0';
next += bytes;
*width += 1; // not 2 or 3, just 1 for one unicode character
// width for Chinese is adjusted later
c += bytes;
outbuffersize -= bytes;
firstflag = 0;
goto start;
}
if(*c == '\0') // the stopping condition
{ snprintf(next, outbuffersize, "</text>"); // close the <text> element, removed final \n 3.14.25
// printf("Returning from svgSymbolText:\n"); // debug
// printf("%s with width = %d\n",svgOut,*width); // debug
bytes = (int) strlen(next);
next += bytes;
outbuffersize -= bytes;
byteswritten = (int) (next - svgOut);
return byteswritten;
}
// Check for backslash codes like \sqrt or \alpha
if(*c == '\\')
{ ++ c;
firstflag = 0;
for(k=0;k<BUFSIZE && isalpha(*c); k++)
{ buffer[k] = *c;
++c;
}
assert(k < BUFSIZE);
buffer[k] = 0;
char *temp = TeXtoUTF8(buffer);
if(temp[0])
{ // this backslash code was recognized
// Now c points to the blank after the backslash code.
/* things like \to4 can be accepted instead of
requiring \to 4. But of course \toa will be an error;
it must be written \to a
*/
if(is_greek(buffer))
{ // it must be printed in italic style so we need a <tspan>
strcpy(next,"<tspan style=\"font-style:italic\">");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
strcpy(next,temp);
*width += 1;
// c is already past this TeX code
strcat(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
goto start;
}
strcpy(next, temp);
++ *width; // by just one, even though two or three bytes are used, one character will show up on the screen,.
if(!strcmp(buffer,"to"))
++ *width; // the arrow for \\to is twice as long as a normal character.
byteswritten = (int)strlen(temp);
next += byteswritten;
outbuffersize -= byteswritten;
goto start;
}
}
// so *c is not backslash
if(*c == '$')
{ dollarflag = (dollarflag ? 0 : 1); // toggle it, so it's 1 between dollar signs
++c; // ignore dollar signs except for knowing whether we're between them
firstflag = 0;
goto start;
}
if(*c == ' ') // only print one blank, even if there are more in the text
{ strcpy(next, " ");
++ *width; // by just one, even though two or three bytes are used, one character will show up on the screen,.
++next;
--outbuffersize;
while (*c ==' ')
++c;
firstflag = 0;
goto start;
}
// put consecutive letters into buffer (including comma and apostrophe and quote marks)
int w;
for(k=0; (( w=isalpha8(c[k])) || c[k]== '\'' || c[k]== '\"' || c[k] == ',') && k < BUFSIZE; k++)
{ buffer[k] = c[k];
if(w != 5)
*width += w;
}
assert(k < BUFSIZE); /* more than this many consecutive letters or digits not allowed! */
/* BUFSIZE was 40 but Chinese overflowed it. */
buffer[k] = '\0';
if(k==1 || c[0] == ',' || c[0] == '\"' || c[0] == '\'')
{ /* one ascii letter alone, or comma, quotes, or quote.
Is it a one-letter word, or is it a variable name?
Here are the rules:
1. I, K, J, or Y immediately followed by parentheses are not variables.
2. If it's enclosed in dollar signs, it's a variable.
If not enclosed in dollar signs, then:
3. If its ascii code is > 128, it's not a variable (Greek letters should be in \TeX style)
4. If the language is English, and the letter is not 'a' or 'A', then
it's a variable.
5. If the language is French or Canadian French, the letter is a variable.
6. If preceded by a space and followed by a space or apostrophe,
if it's the first character and followed by a space or apostrophe, or if
it's the last character and preceded by a space, then:
6a. if it's not a vowel it's a variable.
6b. If it's a vowel (except lower-case "u") it's a one-letter word (not a variable).
6c. lower-case u stating alone is always a variable, as it's not a
word in any supported language.
Thus in languages other
than English and French, all one-letter vowels with spaces on
both sides, or apostrophes after, must be enclosed in dollar signs, and
in English, this is true of "a" and "A".
But by rule 6a, "n" and "m" will be variables (when standing alone).
7. If followed by a period, it's not a variable, as in (eq n. ?)
8. If followed by + or - or ^ or = or < or >
or preceded by + or - or ^ or = or < or >
it's a variable.
9. Else it's a variable, for example f(x).
*/
int var;
unsigned char xx = c[0];
if(xx == ',' || xx == '\"' || xx == '\'')
var = 0;
else if(firstflag == 0 && !dollarflag && isalpha8(c[-1]))
var = 0;
else if(c[1] == '(' && (xx == 'J' || xx == 'Y' || xx == 'I' || xx == 'Y'))
var = 0; /* rule 1 */
else if(dollarflag)
var = 1; /* rule 2 */
else if(xx > 127)
var = 0; /* rule 3 */
else if(language_number == ENGLISH && xx != 'a' && xx != 'A')
var = 1; /* rule 4 */
else if(language_number == FRENCH || language_number == CANADIAN)
var = 1; /* rule 5 */
else if(
(c[1] == 32 || (c[1] == 0 && !firstflag) || c[1] == '\'') &&
(firstflag || c[-1] == 32)
)
var = strchr(vowels,c[0]) ? 0 : 1; /* rule 6 */
else if( c[1] == '.' || c[1] == '+' || c[1] == '-' || c[1] == '^' || c[1] == '=' || c[1] == '<' || c[1] == '>' ||
c[1] == '(' || // as in a(b+c)
c[1] == '/' || // as in (a/b)
(firstflag == 0 &&
(c[-1] == '+' || c[-1] == '-' || c[-1] == '^' ||
c[-1] == '=' || c[-1] == '<' || c[-1] == '>'
)
)
)
var = 1; /* rules 7 and 8 */
else if(language_number == ENGLISH && (xx == 'a' || xx == 'A'))
var = 0; /* rule 4 */
else
var = 1; /* rule 9 */
if(var == 0)
{ // not a variable, so no <tspan> needed
strcpy(next, buffer);
++next;
--outbuffersize;
++c;
// ++*width; NO! we already incremented it when writing to buffer, line 603
goto start;
}
// now it WAS a variable, so we need a <tspan> to put it in italics
if(fontsize == smallfont)
{ // we're in an exponent; the effects of baseline-shift end with
// the tspan so we need to repeat it
strcpy(next, "<tspan style=style=\"font-style:italic;\"baseline-shift:super;font-size:8pt;dx=1;\">");
}
else
strcpy(next,"<tspan style=\"font-style:italic\">");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
strcpy(next,buffer);
++next;
++byteswritten;
// *width += 1; // NO! see above, at the previous NO
++c;
strcpy(next,"</tspan>");
if(fontsize == smallfont)
fontsize = bigfont; // coming out of exponent now.
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
goto start;
}
firstflag = 0;
if(k==2 ||
(k>= 2 && (c[2] == ',' || c[2] == '\"' || c[2] == '\''))
)
{ /* two letters alone might be both variables or they
might be a word. */
int var=0;
if(dollarflag && k==2)
{ /* we're inside dollar signs, so no words should appear; we are watching out for "ab", which counts as a word in German */
/* But we still want "ln" to appear in roman, so IT should count */
if(buffer[0] == 'l' && buffer[1] == 'n')
var = 0;
else
var = 1; // not a word, use italics
}
else
{ if(k==2)
var = two_letter_word(buffer);
else // for example buffer might contain "is,"
{ char buf2[10];
strncpy(buf2,buffer,3);
buf2[2] = 0;
var = two_letter_word(buf2);
}
if(var == 0 && c[2] == ')' && c[0] == 'i' && c[1] == 't')
{ /* sin(it) for example occurs in the complex trig formulae */
var = 1; /* not a word after all! */
}
}
if(var == 0)
{ // not a variable, so no <tspan> needed
strcpy(next, buffer);
next += k;
// *width += 2; // NO! it's already been incremented when bytes
// were written to buffer.
outbuffersize -=2;
c+= k;
goto start;
}
// now it is two variables, use italics
strcpy(next,"<tspan style=\"font-style:italic\">");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
strcpy(next,buffer);
// *width += 2; No! it's already been incremented when bytes
// were written to buffer.
c+=k;
strcat(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
goto start;
}
if(2 < k)
{ int var;
if(k==3 && (!strcmp(buffer,"abc") ||
!strcmp(buffer,"aaa") ||
!strcmp(buffer,"xyz") ||
!strcmp(buffer,"udt") ||
!strcmp(buffer,"vdt") ||
(dollarflag == 1 && buffer[1] == ',') // $log(b,x)$
)
)
var = 1; /* italic */
else
var = 0;
if(var == 0)
{ // not variables, so no <tspan> needed
strcpy(next, buffer);
next += k;
outbuffersize -=k;
// *width += k; NO! width has already been incremented, and besides k counts UTF-8 bytes
c+= k;
goto start;
}
// now it is three variables, use italics
strcpy(next,"<tspan style=\"font-style:italic\">");
byteswritten = (int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
strcpy(next,buffer);
// *width += 3; NO! see 12 lines above
strcat(next,"</tspan>");
byteswritten =(int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
c+=3;
goto start;
}
if(*c == '-' && c[1] == '>') // arrow written as "->"
// this must be caught before the '-' is interpreted as a minus
{ strcpy(next,"\xE2\x86\x92"); // UTF-8 for arrow
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
c += 2;
goto start;
}
// now for consecutive digits, including plus and minus signs
// so with this code we should be able to handle x^(2n+1)
if(isdigit(*c) || *c == '+' || *c == '-')
{ k = 0;
if(fontsize == smallfont)
{ // we're in an exponent; the effects of baseline-shift end with
// the tspan so we need to repeat it
strcpy(next, "<tspan style=style=\"font-style:roman;\"baseline-shift:super;font-size:8pt;dx=1;\">");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
k = 0;
}
while(isdigit(c[k])|| c[k] == '+' || c[k] == '-')
{ *next = c[k];
++next;
--outbuffersize;
++k;
++*width;
}
c += k;
*next='\0';
if(fontsize== smallfont)
{ strcpy(next, "</tspan>");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += k;
}
goto start;
}
if(*c == '<' && c[1] == '=')
{ strcpy(next,"≤");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
c += 2;
goto start;
}
if(*c == '>' && c[1] == '=')
{ strcpy(next,"≥");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
c += 2;
goto start;
}
if(*c == '>')
{ strcpy(next,">");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
++c;
goto start;
}
if(*c == '<')
{ strcpy(next,"<");
byteswritten= (int) strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
++c;
goto start;
}
if(*c == '+' || *c == '/' || *c == '-' || *c == '(' ||
*c == ')' || *c == '[' || *c == ']' || *c == '=' || *c == '*' ||
*c == '|' || *c == ',' || *c == '&' ||
// comma occurs in lim(x \to a, f(x)
// & occurs in an error message about illegal & signs
*c == '\'' || *c == '.' || *c == ':' || *c == '?' ||
// apostrophe and period occur in "That's the answer."
// ? occurs in "How many terms do you want to see?", as well as in menu items
*c == ';' || *c == '!' || *c == '='
// semicolon occurs in "= n pi; or just n pi "
// ! at beginning of a comment means, make it a permanent comment
// First use \"show all equations\";
)
{ *next = *c;
++c;
++*width;
--outbuffersize;
++next;
*next='\0';
if(fontsize == smallfont)
{ // coming out of exponent now
strcpy(next,"</tspan>");
fontsize = 12; // coming out of exponent now.
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
}
goto start;
}
if(*c == '^')
/* we will put the exponent in superscript */
{ if (fontsize == smallfont)
assert(0); // nested exponents are NOT allowed!
fontsize = smallfont;
++c; // skip '^'
while(*c == 32)
++c; /* skip any blanks, as in (lim u) ^ (lim v) --but that won't work anyway. */
if (isdigit(c[0]) ||
(c[0] == '-' && isdigit(c[1])) ||
(c[0] == '(' && isdigit(c[1])) ||
(c[0] == '(' && c[1] == '-' && isdigit(c[2]))
)
strcpy(next, "<tspan style=\"baseline-shift:super;font-size:8pt;dx=1;\">");
else
strcpy(next, "<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">");
// so x^22 will work but x^(22) won't work
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
if(*c == '-') // negative exponent
{ *next = '-';
++next;
++c;
++*width;
++byteswritten;
if (isalpha(c[0]))
{ // accept a^-n
*next = c[0];
++next;
++c;
c+= k;
strcpy(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont;
goto start;
}
}
if (isdigit(c[0])) // integer exponent
{
while(isdigit(c[k]))
{ *next = c[k];
++k;
++next;
++*width;
--outbuffersize;
}
c+= k;
strcpy(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont;
goto start;
}
if (isalpha(c[0]) || c[0] == '?')
{ // just one letter will go into the exponent.
// Thus x^n will work, but x^ab will be x^a b
// also x^c won't work with c a UTF-8 "character"
*next = c[0];
++*width;
++next;
++c;
c+= k;
strcpy(next,"</tspan>\n");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont; // done with the exponent
goto start;
}
if(c[0] == '(')
{
++c; // pass the open paren without copying it
// following code accepts a^(-n) but also incorrectly a^(- sin x)
// so we need to check function names don't occur in exponents
// or fix this code
byteswritten = 0;
if(*c == '-')
{ *next = *c;
++*width;
++c;
++next;
++byteswritten;
}
/// accept ^(1/2) using a UTF character for 1/2
if(c[0] == '1' && c[1] == '/' && c[2] == '2' && c[3] == ')')
{ c += 4;
++ *width;
// we already wrote the opening <tspan>
strcpy(next,"\xC2\xBD"); // UTF-8 for 1/2
strcat(next,"</tspan>\n");
byteswritten = (int) strlen(next);
next += byteswritten;
fontsize = bigfont;
goto start;
}
if(*c == 'i')
{ buffer[0] = 'i';
buffer[1] = '\0';
++c;
++ *width;
// accept e^(i \\theta) and e^(i \\pi) and e^(it)
// change to italics
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">");
if(*c == '\\')
{ c++;
char temp[12];
k = 0;
while(isalpha(*c))
{ buffer[k]= *c;
++k;
++c;
}
buffer[k] = '\0';
strcpy(temp,TeXtoUTF8(buffer));
if(strlen(temp) == 0)
assert(0); // TeXtoUTF8 failed
strcpy(next,temp);
++*width;
byteswritten = (int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
byteswritten = 0;
}
}
while(isdigit(*c) || *c == '/' || *c == ' ') // accept 2^(5/2)
{ *next = *c;
++*width;
++c;
++next;
++byteswritten;
}
outbuffersize -= byteswritten;
if(*c == '\\')
{ ++c; // pass the backslash;
int k;
for( k = 0; isalpha(c[k]) && k < 10; k++)
buffer[k] = c[k];
buffer[k] = '\0';
if(!strcmp(buffer,"onehalf") ||
!strcmp(buffer,"sqrt") ||
!strcmp(buffer,"onequarter") ||
!strcmp(buffer,"infty") ||
!strcmp(buffer,"theta") || // in re^(i\\theta)
!strcmp(buffer, "lim") || // in ...^\\lim v
!strcmp(buffer, "log") ||
!strcmp(buffer, "sin") ||
!strcmp(buffer, "ln")
)
{ char temp[12];
if(!strcmp(buffer,"theta"))
{ // it has be an italic theta
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt\">");
}
if(!strcmp(buffer,"lim") ||
!strcmp(buffer,"log") ||
!strcmp(buffer,"ln") ||
!strcmp(buffer,"sin")
)
{
c += strlen(buffer);
assert(*c == ' ');
while(*c == ' ')
++c;
if(isdigit(*c))
{ int j = (int) strlen(buffer);
buffer[j] = ' ';
buffer[j+1]= *c;
buffer[j+2] = '\0';
++c;
if(*c == '/') // accept e^(\\log 1/x)
{ ++c;
buffer[j+2] = '/';
buffer[j+3] = '\0';
}
}
assert(isalpha(*c));
// what came just before was probably in italics,
// but "lim", "log", "ln" have to be in Roman.
// So we have to
// close the <tspan> and open a new one in Roman
strcpy(next,"</tspan>\n");
strcat(next, "<tspan style=\"baseline-shift:super;font-size:8pt;font-style:roman\" dx=\"1\">");
strcat(next, buffer);
strcat(next, "</tspan>\n");
// Now open up a new span in italics
strcat(next, "<tspan style=\"baseline-shift:super;font-size:8pt;font-style:italic\" dx=\"1\">");
int nchars = (int) strlen(buffer);
buffer[0] = ' ';
buffer[1] = *c;
buffer[2] = '\0';
++c;
strcat(next,buffer);
byteswritten = (int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
byteswritten = 0;
*width += nchars + 2; // lim v is five characters
}
else
{
strcpy(temp,TeXtoUTF8(buffer));
strcpy(next,temp);
++*width;
byteswritten = (int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
byteswritten = 0;
c+= k;
}
}
}
// We must also handle x^(2n) and x^(2n+1)
if(isalpha(*c) && c[1] == ')')
{ // x^(2n), or x^(-n), with *c now equal to n
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">%c", *c);
++*width;
byteswritten = (int) strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
++c; // pass the n
assert(*c == ')');
}
else if(isalpha(*c) &&
(c[1] == '+' || c[1] == '-') &&
isdigit(c[2]) &&
c[3] == ')')
{ // x^(2n+1) or x^(2n-1) are allowed, but not with spaces,
// and not with \pm in place of + or -.
// we've already written the 2 in roman,
// but we haven't closed the span.
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">%c</tspan>\n<tspan style=\"baseline-shift:super;font-style:roman;font-size:8pt;\">%c%c",c[0],c[1],c[2]);
byteswritten = (int)strlen(next);
next += byteswritten;
*width += 3;
outbuffersize -= byteswritten;
c += 3;
assert(*c == ')');
}
else if(c[0] == 'l' && c[1] == 'n' && c[2] == ')')
{ // as in ^(ln)
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:roman;font-size:8pt;\">%c%c%c%c",' ',c[0],c[1],' ');
byteswritten = (int)strlen(next);
next += byteswritten;
*width += 4;
outbuffersize -= byteswritten;
c += 2;
assert(*c == ')');
}
else if ((isalpha(c[0]) || c[0] == '?') && isalpha(c[1]) && c[2] == ')')
{ // x^(ab) or x^(2ab) or x^(?n)
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">%c%c",c[0],c[1]);
byteswritten = (int)strlen(next);
next += byteswritten;
*width += 2;
outbuffersize -= byteswritten;
c += 2;
assert(*c == ')');
}
else if (isalpha(c[0]) &&
(c[1] == '-' || c[1] == '+') &&
(isalpha(c[2]) ||c[2] == '?')
&& c[3] == ')'
)
// the question mark or '+' will be in italics, which is not ideal, but OK.
{ // x^(n-?)
sprintf(next, "</tspan>\n<tspan style=\"baseline-shift:super;font-style:italic;font-size:8pt;\">%c%c%c",c[0],c[1],c[2]);
byteswritten = (int)strlen(next);
next += byteswritten;
*width += 3;
outbuffersize -= byteswritten;
c += 3;
assert(*c == ')');
}
if(*c != ')')
assert(0);
++c; // pass the close paren without copying it
strcpy(next,"</tspan>\n");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont;
goto start;
}
fontsize = bigfont;
strcpy(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont;
goto start;
}
if(*c == '_')
/* subscript */
{ ++c; // skip '_'
if(*c == '-') // negative subscript; might occur for Bessel functions.
{ *next = '-';
++next;
++c;
++*width;
++byteswritten;
}
else if (isdigit(c[0])) // integer subscript
{
strcpy(next, "<tspan style=\"baseline-shift:sub;font-size:8pt;\">");
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
while(isdigit(c[k]))
{ *next = c[k];
++k;
++next;
++*width;
--outbuffersize;
}
*next = '\0';
c+= k;
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
}
else if (isalpha(c[0])) // letter in subscript
{
strcpy(next, "<tspan style=\"baseline-shift:sub;font-style:italic;font-size:8pt;\">");
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
sprintf(next,"%c",c[0]);
byteswritten = (int) strlen(next);
next += byteswritten;
++c;
++ *width;
-- outbuffersize;
}
else if (c[0] == '(')
{ /* we have to handle at least a_(2k) and a_(2k+1),
which will take several <tspan> elements, because the
2 and the + are in roman, while k is in italics.
Also we need J_(n-1) for Bessel functions
*/
if(isdigit(c[1]) && isalpha(c[2])) // _(2n...
{ // put the 2 in roman and the n in italics
strcpy(next, "<tspan style=\"baseline-shift:sub;font-style:roman;font-size:8pt;\">");
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
sprintf(next,"%c </tspan>\n",c[1]);
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
strcpy(next, "<tspan style=\"baseline-shift:sub;font-style:italic;font-size:8pt;\">");
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
sprintf(next,"%c </tspan>\n",c[2]);
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
if(c[3] == ')') // the subscript was 2n or similar
{ c += 4; // skip the closing paren
fontsize = bigfont;
goto start;
}
// now it's _(2n+1) or the like
c+=3;
if(*c != '+' && *c != '-')
assert(0); // 2n+1 or 2n-1
if(!isdigit(c[1]) || c[2] != ')')
assert(0);
// put the +1 or -1 in roman
strcpy(next, "<tspan style=\"baseline-shift:sub;font-style:roman;font-size:8pt;\">");
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
sprintf(next,"%c%c </tspan>\n",c[0],c[1]);
byteswritten = (int)strlen(next);
outbuffersize -= byteswritten;
next += byteswritten;
c+=3; // skip the closing paren
fontsize = bigfont;
goto start;
}
if(isalpha(c[1]) &&
(c[2] == '-' || c[2] == '+') &&
isdigit(c[3])
)
{ // for example J_(n-1)
strcpy(next, "<tspan style=\"baseline-shift:sub;font-style:italic;font-size:8pt;\">");
char temp[12];
temp[0] = c[1];
temp[1] = '\0';
strcat(next,temp);
// close the italic tspan and open a roman one
strcat(next, "</tspan>\n<tspan style=\"baseline-shift:sub;font-style:roman;font-size:8pt;\">");
temp[0] = c[2];
temp[1] = c[3];
temp[3] = '\0';
strcat(next,temp);
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
c += 5;
}
}
else
assert(0); // no more complicated subscripts are allowed.
// Greek or TeX subscripts not supported
// double subscripts not supported
strcpy(next,"</tspan>");
byteswritten = (int)strlen(next);
next += byteswritten;
outbuffersize -= byteswritten;
fontsize = bigfont;
goto start;
}
printf("oops, couldn't handle %s\n",c);
printf("had to stop at %s\n",c);
printf("That might happen if svgSymbolText was called with garbage input.\n");
assert(0); // all legal cases are covered.
return 1;
}
Sindbad File Manager Version 1.0, Coded By Sindbad EG ~ The Terrorists