BibTeX Entry


@inproceedings{CalaisEtAl:FACL25,
  author	= {Calais, Pedro and Franco, Gabriel and Tang, Zilu and Nikas, Themistoklis and Jr., Wagner Meira and Terzi, Evimari and Crovella, Mark},
  title		= {Disentangling Text and Math in Word Problems: Evidence for the Bidimensional Structure of Large Language Models' Reasoning},
  booktitle	= {Findings of the Association for Computational Linguistics},
  year		= {2025},
  abstract	= {Do large language models (LLMs) process text and mathematics as a unified skill, or do these components rely on distinct underlying mechanisms? We investigate this question by disentangling the textual interpretation and mathematical solving steps in math word problems drawn from Brazil's largest college entrance exam (ENEM) and GSM8K, a popular grade school-level benchmark. Using the symbolic solver SymPy, we transform word problems into equivalent purely mathematical representations, isolating equation formulation from textual comprehension. Our extended benchmarks enable a structured analysis of LLM performance across these two dimensions. Through empirical evaluations, we find that small-scale LLMs struggle significantly more with text interpretation than with equation solving, with accuracy dropping by a factor of 2 to 7 when solving full word problems compared to their math-only counterparts. Exploratory factor analysis confirms a bidimensional structure in LLM reasoning, where models exhibit distinct proficiencies in textual and mathematical components, underscoring the need for targeted improvements in language comprehension. Through factor analysis, we provide insights into model selection, helping practitioners make informed choices based on computational costs and task requirements.},
  doi		= {TBD},
  URL		= {TBD}
}