/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Novell code. * * The Initial Developer of the Original Code is Novell Corporation. * Portions created by the Initial Developer are Copyright (C) 2006 * the Initial Developer. All Rights Reserved. * * Contributor(s): * robert@ocallahan.org * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nsTextRunTransformations.h" #include "nsTextFrameUtils.h" #include "gfxSkipChars.h" #include "nsGkAtoms.h" #include "nsStyleConsts.h" #include "nsStyleContext.h" #include "gfxContext.h" #include "nsContentUtils.h" #include "nsUnicharUtils.h" #include "nsUnicodeProperties.h" #include "nsSpecialCasingData.h" // Unicode characters needing special casing treatment in tr/az languages #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131 // Greek sigma needs custom handling for the lowercase transform; for details // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120. #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 #define GREEK_SMALL_LETTER_SIGMA 0x03C3 nsTransformedTextRun * nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams, nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup, const PRUnichar* aString, PRUint32 aLength, const PRUint32 aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { NS_ASSERTION(!(aFlags & gfxTextRunFactory::TEXT_IS_8BIT), "didn't expect text to be marked as 8-bit here"); void *storage = AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength); if (!storage) { return nsnull; } return new (storage) nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength, aFlags, aStyles, aOwnsFactory); } void nsTransformedTextRun::SetCapitalization(PRUint32 aStart, PRUint32 aLength, bool* aCapitalization, gfxContext* aRefContext) { if (mCapitalize.IsEmpty()) { if (!mCapitalize.AppendElements(GetLength())) return; memset(mCapitalize.Elements(), 0, GetLength()*sizeof(bool)); } memcpy(mCapitalize.Elements() + aStart, aCapitalization, aLength*sizeof(bool)); mNeedsRebuild = true; } bool nsTransformedTextRun::SetPotentialLineBreaks(PRUint32 aStart, PRUint32 aLength, PRUint8* aBreakBefore, gfxContext* aRefContext) { bool changed = gfxTextRun::SetPotentialLineBreaks(aStart, aLength, aBreakBefore, aRefContext); if (changed) { mNeedsRebuild = true; } return changed; } size_t nsTransformedTextRun::SizeOfExcludingThis(nsMallocSizeOfFun aMallocSizeOf) { size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf); total += mStyles.SizeOfExcludingThis(aMallocSizeOf); total += mCapitalize.SizeOfExcludingThis(aMallocSizeOf); if (mOwnsFactory) { total += aMallocSizeOf(mFactory); } return total; } size_t nsTransformedTextRun::SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf) { return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf); } nsTransformedTextRun* nsTransformingTextRunFactory::MakeTextRun(const PRUnichar* aString, PRUint32 aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, PRUint32 aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString, aLength, aFlags, aStyles, aOwnsFactory); } nsTransformedTextRun* nsTransformingTextRunFactory::MakeTextRun(const PRUint8* aString, PRUint32 aLength, const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup, PRUint32 aFlags, nsStyleContext** aStyles, bool aOwnsFactory) { // We'll only have a Unicode code path to minimize the amount of code needed // for these rarely used features NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast(aString), aLength); return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup, aFlags & ~(gfxFontGroup::TEXT_IS_PERSISTENT | gfxFontGroup::TEXT_IS_8BIT), aStyles, aOwnsFactory); } /** * Copy a given textrun, but merge certain characters into a single logical * character. Glyphs for a character are added to the glyph list for the previous * character and then the merged character is eliminated. Visually the results * are identical. * * This is used for text-transform:uppercase when we encounter a SZLIG, * whose uppercase form is "SS", or other ligature or precomposed form * that expands to multiple codepoints during case transformation. * * This function is unable to merge characters when they occur in different * glyph runs. This only happens in tricky edge cases where a character was * decomposed by case-mapping (e.g. there's no precomposed uppercase version * of an accented lowercase letter), and then font-matching caused the * diacritics to be assigned to a different font than the base character. * In this situation, the diacritic(s) get discarded, which is less than * ideal, but they probably weren't going to render very well anyway. * Bug 543200 will improve this by making font-matching operate on entire * clusters instead of individual codepoints. * * For simplicity, this produces a textrun containing all DetailedGlyphs, * no simple glyphs. So don't call it unless you really have merging to do. * * @param aCharsToMerge when aCharsToMerge[i] is true, this character is * merged into the previous character */ static void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc, bool* aCharsToMerge) { aDest->ResetGlyphRuns(); gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength()); PRUint32 offset = 0; nsAutoTArray glyphs; while (iter.NextRun()) { gfxTextRun::GlyphRun* run = iter.GetGlyphRun(); nsresult rv = aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false); if (NS_FAILED(rv)) return; bool anyMissing = false; PRUint32 mergeRunStart = iter.GetStringStart(); const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs(); gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart]; PRUint32 stringEnd = iter.GetStringEnd(); for (PRUint32 k = iter.GetStringStart(); k < stringEnd; ++k) { const gfxTextRun::CompressedGlyph g = srcGlyphs[k]; if (g.IsSimpleGlyph()) { if (!anyMissing) { gfxTextRun::DetailedGlyph details; details.mGlyphID = g.GetSimpleGlyph(); details.mAdvance = g.GetSimpleAdvance(); details.mXOffset = 0; details.mYOffset = 0; glyphs.AppendElement(details); } } else { if (g.IsMissing()) { anyMissing = true; glyphs.Clear(); } if (g.GetGlyphCount() > 0) { glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount()); } } if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) { // next char is supposed to merge with current, so loop without // writing current merged glyph to the destination continue; } // If the start of the merge run is actually a character that should // have been merged with the previous character (this can happen // if there's a font change in the middle of a case-mapped character, // that decomposed into a sequence of base+diacritics, for example), // just discard the entire merge run. See comment at start of this // function. NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart], "unable to merge across a glyph run boundary, " "glyph(s) discarded"); if (!aCharsToMerge[mergeRunStart]) { if (anyMissing) { mergedGlyph.SetMissing(glyphs.Length()); } else { mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(), mergedGlyph.IsLigatureGroupStart(), glyphs.Length()); } aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements()); ++offset; } glyphs.Clear(); anyMissing = false; mergeRunStart = k + 1; if (mergeRunStart < stringEnd) { mergedGlyph = srcGlyphs[mergeRunStart]; } } NS_ASSERTION(glyphs.Length() == 0, "Leftover glyphs, don't request merging of the last character with its next!"); } NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations"); } static gfxTextRunFactory::Parameters GetParametersForInner(nsTransformedTextRun* aTextRun, PRUint32* aFlags, gfxContext* aRefContext) { gfxTextRunFactory::Parameters params = { aRefContext, nsnull, nsnull, nsnull, 0, aTextRun->GetAppUnitsPerDevUnit() }; *aFlags = aTextRun->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT; return params; } void nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); gfxFontStyle fontStyle = *fontGroup->GetStyle(); fontStyle.size *= 0.8; nsRefPtr smallFont = fontGroup->Copy(&fontStyle); if (!smallFont) return; PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr* styles = aTextRun->mStyles.Elements(); // Create a textrun so we can check cluster-start properties nsAutoPtr inner(fontGroup->MakeTextRun(str, length, &innerParams, flags)); if (!inner.get()) return; nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true); aTextRun->ResetGlyphRuns(); PRUint32 runStart = 0; bool runIsLowercase = false; nsAutoTArray styleArray; nsAutoTArray canBreakBeforeArray; PRUint32 i; for (i = 0; i <= length; ++i) { bool isLowercase = false; if (i < length) { // Characters that aren't the start of a cluster are ignored here. They // get added to whatever lowercase/non-lowercase run we're in. if (!inner->IsClusterStart(i)) { isLowercase = runIsLowercase; } else { if (styles[i]->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) { PRUint32 ch = str[i]; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } PRUint32 ch2 = ToUpperCase(ch); isLowercase = ch != ch2 || mozilla::unicode::SpecialUpper(ch); } else { // Don't transform the character! I.e., pretend that it's not lowercase } } } if ((i == length || runIsLowercase != isLowercase) && runStart < i) { nsAutoPtr transformedChild; nsAutoPtr cachedChild; gfxTextRun* child; if (runIsLowercase) { transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart, &innerParams, smallFont, flags, styleArray.Elements(), false); child = transformedChild; } else { cachedChild = fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart, "lost some break-before values?"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart); runStart = i; styleArray.Clear(); canBreakBeforeArray.Clear(); } if (i < length) { runIsLowercase = isLowercase; styleArray.AppendElement(styles[i]); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); } } } void nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun, gfxContext* aRefContext) { PRUint32 length = aTextRun->GetLength(); const PRUnichar* str = aTextRun->mString.BeginReading(); nsRefPtr* styles = aTextRun->mStyles.Elements(); nsAutoString convertedString; nsAutoTArray charsToMergeArray; nsAutoTArray styleArray; nsAutoTArray canBreakBeforeArray; PRUint32 extraCharsCount = 0; // Some languages have special casing conventions that differ from the // default Unicode mappings. // The enum values here are named for well-known exemplar languages that // exhibit the behavior in question; multiple lang tags may map to the // same setting here, if the behavior is shared by other languages. enum { eNone, // default non-lang-specific behavior eTurkish, // preserve dotted/dotless-i distinction in uppercase eDutch // treat "ij" digraph as a unit for capitalization } languageSpecificCasing = eNone; const nsIAtom* lang = nsnull; bool capitalizeDutchIJ = false; bool prevIsLetter = false; PRUint32 sigmaIndex = PRUint32(-1); nsIUGenCategory::nsUGenCategory cat; PRUint32 i; for (i = 0; i < length; ++i) { PRUint32 ch = str[i]; nsStyleContext* styleContext = styles[i]; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i)); PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : styleContext->GetStyleText()->mTextTransform; int extraChars = 0; const mozilla::unicode::MultiCharMapping *mcm; if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) { ch = SURROGATE_TO_UCS4(ch, str[i + 1]); } if (lang != styleContext->GetStyleFont()->mLanguage) { lang = styleContext->GetStyleFont()->mLanguage; if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az || lang == nsGkAtoms::ba || lang == nsGkAtoms::crh || lang == nsGkAtoms::tt) { languageSpecificCasing = eTurkish; } else if (lang == nsGkAtoms::nl) { languageSpecificCasing = eDutch; } else { languageSpecificCasing = eNone; } } switch (style) { case NS_STYLE_TEXT_TRANSFORM_LOWERCASE: if (languageSpecificCasing == eTurkish) { if (ch == 'I') { ch = LATIN_SMALL_LETTER_DOTLESS_I; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { ch = 'i'; prevIsLetter = true; sigmaIndex = PRUint32(-1); break; } } // Special lowercasing behavior for Greek Sigma: note that this is listed // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a // language-specific mapping; it applies regardless of the language of // the element. // // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e. // the non-final form) whenever there is a following letter, or when the // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a // LETTER); and to FINAL SIGMA when it is preceded by another letter but // not followed by one. // // To implement the context-sensitive nature of this mapping, we keep // track of whether the previous character was a letter. If not, CAPITAL // SIGMA will map directly to SMALL SIGMA. If the previous character // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the // position in the converted string; if we then encounter another letter, // that FINAL SIGMA is replaced with a standard SMALL SIGMA. cat = mozilla::unicode::GetGenCategory(ch); // If sigmaIndex is not -1, it marks where we have provisionally mapped // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we // need to change it to SMALL SIGMA. if (sigmaIndex != PRUint32(-1)) { if (cat == nsIUGenCategory::kLetter) { convertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex); } } if (ch == GREEK_CAPITAL_LETTER_SIGMA) { // If preceding char was a letter, map to FINAL instead of SMALL, // and note where it occurred by setting sigmaIndex; we'll change it // to standard SMALL SIGMA later if another letter follows if (prevIsLetter) { ch = GREEK_SMALL_LETTER_FINAL_SIGMA; sigmaIndex = convertedString.Length(); } else { // CAPITAL SIGMA not preceded by a letter is unconditionally mapped // to SMALL SIGMA ch = GREEK_SMALL_LETTER_SIGMA; sigmaIndex = PRUint32(-1); } prevIsLetter = true; break; } // ignore diacritics for the purpose of contextual sigma mapping; // otherwise, reset prevIsLetter appropriately and clear the // sigmaIndex marker if (cat != nsIUGenCategory::kMark) { prevIsLetter = (cat == nsIUGenCategory::kLetter); sigmaIndex = PRUint32(-1); } mcm = mozilla::unicode::SpecialLower(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToLowerCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_UPPERCASE: if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } mcm = mozilla::unicode::SpecialUpper(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToUpperCase(ch); break; case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE: if (capitalizeDutchIJ && ch == 'j') { ch = 'J'; capitalizeDutchIJ = false; break; } capitalizeDutchIJ = false; if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) { if (languageSpecificCasing == eTurkish && ch == 'i') { ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE; break; } if (languageSpecificCasing == eDutch && ch == 'i') { ch = 'I'; capitalizeDutchIJ = true; break; } mcm = mozilla::unicode::SpecialTitle(ch); if (mcm) { int j = 0; while (j < 2 && mcm->mMappedChars[j + 1]) { convertedString.Append(mcm->mMappedChars[j]); ++extraChars; ++j; } ch = mcm->mMappedChars[j]; break; } ch = ToTitleCase(ch); } break; default: break; } if (IS_IN_BMP(ch)) { convertedString.Append(ch); } else { convertedString.Append(H_SURROGATE(ch)); convertedString.Append(L_SURROGATE(ch)); i++; charsToMergeArray.AppendElement(false); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); } while (extraChars > 0) { ++extraCharsCount; charsToMergeArray.AppendElement(true); styleArray.AppendElement(styleContext); canBreakBeforeArray.AppendElement(false); --extraChars; } } PRUint32 flags; gfxTextRunFactory::Parameters innerParams = GetParametersForInner(aTextRun, &flags, aRefContext); gfxFontGroup* fontGroup = aTextRun->GetFontGroup(); nsAutoPtr transformedChild; nsAutoPtr cachedChild; gfxTextRun* child; if (mInnerTransformingTextRunFactory) { transformedChild = mInnerTransformingTextRunFactory->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, fontGroup, flags, styleArray.Elements(), false); child = transformedChild.get(); } else { cachedChild = fontGroup->MakeTextRun( convertedString.BeginReading(), convertedString.Length(), &innerParams, flags); child = cachedChild.get(); } if (!child) return; // Copy potential linebreaks into child so they're preserved // (and also child will be shaped appropriately) NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(), "Dropped characters or break-before values somewhere!"); child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(), canBreakBeforeArray.Elements(), aRefContext); if (transformedChild) { transformedChild->FinishSettingProperties(aRefContext); } if (extraCharsCount > 0) { // Now merge multiple characters into one multi-glyph character as required MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements()); } else { // No merging to do, so just copy; this produces a more optimized textrun. // We can't steal the data because the child may be cached and stealing // the data would break the cache. aTextRun->ResetGlyphRuns(); aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0); } }