{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e91fd8c7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hello, World!\n" ] } ], "source": [ "print(\"Hello, World!\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "11896305", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 'Half-title Page', 2]\n", "[1, 'Physical/Political Map of The United States', 5]\n", "[1, 'Political Map of The World', 6]\n", "[1, 'Title Page', 7]\n", "[1, 'Copyright', 10]\n", "[1, 'Dedication', 13]\n", "[1, 'Contents', 14]\n", "[1, 'List of Maps, Tables, and Figures', 22]\n", "[1, 'About the Authors', 32]\n", "[1, 'Preface', 34]\n", "[1, 'Resources For Students And Instructors', 54]\n", "[1, 'Chapter 1: Old Worlds and New', 59]\n", "[1, 'An Old World: North America', 63]\n", "[1, 'An Old World: West Africa', 73]\n", "[1, 'An Old World: Western Europe', 75]\n", "[1, 'Contact', 80]\n", "[1, 'The Spanish Empire', 88]\n", "[1, 'The French and Dutch Empires', 108]\n", "[1, 'Chapter Review', 120]\n", "[1, 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 124]\n", "[1, 'England and the Americas', 129]\n", "[1, 'Early English Exploration and Colonization', 138]\n", "[1, 'The Chesapeake', 142]\n", "[1, 'Origins of American Slavery', 150]\n", "[1, 'The New England Way', 157]\n", "[1, 'New Englanders Divided', 169]\n", "[1, 'Religion, Politics, and Freedom', 180]\n", "[1, 'Chapter Review', 188]\n", "[1, 'Chapter 3: Creating Anglo-America, 1660⠍1750', 193]\n", "[1, 'Global Competition and the Expansion of England⠒s Empire', 197]\n", "[1, 'Entrenchment of American Slavery', 206]\n", "[1, 'Colonies in Crisis', 216]\n", "[1, 'The Growth of Colonial America', 223]\n", "[1, 'Social Classes in the British Colonies', 238]\n", "[1, 'North America at Mid-Century', 246]\n", "[1, 'Chapter Review', 249]\n", "[1, 'Chapter 4: Slavery, Freedom, and the Struggle for Empire to 1763', 253]\n", "[1, 'Slavery and Empire', 257]\n", "[1, 'Slave Cultures and Slave Resistance', 274]\n", "[1, 'An Empire of Freedom', 280]\n", "[1, 'The Public Sphere', 285]\n", "[1, 'The Great Awakening', 294]\n", "[1, 'Imperial Rivalries', 298]\n", "[1, 'Battle for the Continent', 306]\n", "[1, 'Chapter Review', 320]\n", "[1, 'Chapter 5: The American Revolution, 1763⠍1783', 325]\n", "[1, 'The Crisis Begins', 329]\n", "[1, 'The Road to Revolution', 339]\n", "[1, 'The Coming of Independence', 345]\n", "[1, 'Securing Independence', 359]\n", "[1, 'Chapter Review', 375]\n", "[1, 'Chapter 6: The Revolution Within', 381]\n", "[1, 'Democratizing Freedom', 384]\n", "[1, 'Toward Religious Toleration', 392]\n", "[1, 'Defining Economic Freedom', 399]\n", "[1, 'The Limits of Liberty', 404]\n", "[1, 'Slavery and the Revolution', 410]\n", "[1, 'Daughters of Liberty', 422]\n", "[1, 'Chapter Review', 432]\n", "[1, 'Chapter 7: Founding a Nation, 1783⠍1791', 435]\n", "[1, 'America Under the Confederation', 439]\n", "[1, 'A New Constitution', 450]\n", "[1, 'The Ratification Debate and the Origin of the Bill of Rights', 460]\n", "[1, '“We the Peopleâ€\\x9d', 472]\n", "[1, 'Chapter Review', 486]\n", "[1, 'Chapter 8: Securing the Republic, 1791⠍1815', 491]\n", "[1, 'Politics in an Age of Passion', 494]\n", "[1, 'The Adams Presidency', 508]\n", "[1, 'Jefferson in Power', 522]\n", "[1, 'The “Second War of Independenceâ€\\x9d', 531]\n", "[1, 'Chapter Review', 542]\n", "[1, 'Chapter 9: The Market Revolution, 1800⠍1840', 548]\n", "[1, 'A New Economy', 552]\n", "[1, 'The Rise of the West', 558]\n", "[1, 'Market Society', 566]\n", "[1, 'The Free Individual', 582]\n", "[1, 'The Limits of Prosperity', 591]\n", "[1, 'Chapter Review', 601]\n", "[1, 'Chapter 10: Democracy in America, 1815⠍1840', 606]\n", "[1, 'The Triumph of Democracy', 610]\n", "[1, 'Nationalism and Its Discontents', 623]\n", "[1, 'Nation, Section, and Party', 630]\n", "[1, 'The Age of Jackson', 639]\n", "[1, 'Indian Removal', 647]\n", "[1, 'The Bank War and After', 657]\n", "[1, 'Chapter Review', 664]\n", "[1, 'Chapter 11: The Peculiar Institution', 669]\n", "[1, 'The Old South', 672]\n", "[1, 'Life Under Slavery', 690]\n", "[1, 'Slave Culture', 704]\n", "[1, 'Resistance to Slavery', 712]\n", "[1, 'Chapter Review', 722]\n", "[1, 'Chapter 12: An Age of Reform, 1820⠍1840', 725]\n", "[1, 'The Reform Impulse', 728]\n", "[1, 'The Crusade Against Slavery', 740]\n", "[1, 'Black and White Abolitionism', 755]\n", "[1, 'The Origins of Feminism', 761]\n", "[1, 'Chapter Review', 775]\n", "[1, 'Chapter 13: A House Divided, 1840⠍1861', 780]\n", "[1, 'Fruits of Manifest Destiny', 783]\n", "[1, 'A Dose of Arsenic', 803]\n", "[1, 'The Rise of the Republican Party', 814]\n", "[1, 'The Emergence of Lincoln', 821]\n", "[1, 'The Impending Crisis', 837]\n", "[1, 'Chapter Review', 844]\n", "[1, 'Chapter 14: A New Birth of Freedom: The Civil War, 1861⠍1865', 849]\n", "[1, 'The First Modern War', 853]\n", "[1, 'The Coming of Emancipation', 864]\n", "[1, 'The Second American Revolution', 876]\n", "[1, 'The Confederate Nation', 891]\n", "[1, 'Turning Points', 900]\n", "[1, 'Rehearsals for Reconstruction and the End of the War', 904]\n", "[1, 'Chapter Review', 912]\n", "[1, 'Chapter 15: “What Is Freedom?â€\\x9d: Reconstruction', 917]\n", "[1, 'The Meaning of Freedom', 921]\n", "[1, 'The Making of Radical Reconstruction', 938]\n", "[1, 'Radical Reconstruction in the South', 956]\n", "[1, 'The Overthrow of Reconstruction', 963]\n", "[1, 'Chapter Review', 972]\n", "[1, 'Chapter 16: America⠒s Gilded Age, 1870⠍1890', 976]\n", "[1, 'The Second Industrial Revolution', 980]\n", "[1, 'Freedom in the Gilded Age', 992]\n", "[1, 'Labor and the Republic', 999]\n", "[1, 'The Transformation of the West', 1009]\n", "[1, 'Politics in a Gilded Age', 1032]\n", "[1, 'Chapter Review', 1039]\n", "[1, 'Chapter 17: Freedom⠒s Boundaries, at Home and Abroad, 1890⠍1900', 1044]\n", "[1, 'The Populist Challenge', 1048]\n", "[1, 'The Segregated South', 1059]\n", "[1, 'Redrawing the Boundaries', 1075]\n", "[1, 'Becoming a World Power', 1082]\n", "[1, 'Chapter Review', 1101]\n", "[1, 'Chapter 18: The Progressive Era, 1900⠍1916', 1106]\n", "[1, 'An Urban Age and a Consumer Society', 1111]\n", "[1, 'Varieties of Progressivism', 1128]\n", "[1, 'The Politics of Progressivism', 1144]\n", "[1, 'The Progressive Presidents', 1158]\n", "[1, 'Chapter Review', 1170]\n", "[1, 'Chapter 19: Safe for Democracy: The United States and World War I', 1176]\n", "[1, 'An Era of Intervention', 1181]\n", "[1, 'America and the Great War', 1189]\n", "[1, 'The War at Home', 1195]\n", "[1, 'Who Is an American?', 1210]\n", "[1, '1919', 1227]\n", "[1, 'Chapter Review', 1239]\n", "[1, 'Chapter 20: From Business Culture to Great Depression: The Twenties, 1920⠍1932', 1244]\n", "[1, 'The Business of America', 1248]\n", "[1, 'Business and Government', 1258]\n", "[1, 'The Birth of Civil Liberties', 1267]\n", "[1, 'The Culture Wars', 1273]\n", "[1, 'The Great Depression', 1290]\n", "[1, 'Chapter Review', 1298]\n", "[1, 'Chapter 21: The New Deal, 1932⠍1940', 1303]\n", "[1, 'The First New Deal', 1308]\n", "[1, 'The Grassroots Revolt', 1321]\n", "[1, 'The Second New Deal', 1328]\n", "[1, 'A Reckoning With Liberty', 1333]\n", "[1, 'The Limits of Change', 1343]\n", "[1, 'A New Conception of America', 1353]\n", "[1, 'Chapter Review', 1362]\n", "[1, 'Chapter 22: Fighting for the Four Freedoms: World War II, 1941⠍1945', 1368]\n", "[1, 'Fighting World War II', 1374]\n", "[1, 'The Home Front', 1386]\n", "[1, 'Visions of Postwar Freedom', 1398]\n", "[1, 'The American Dilemma', 1403]\n", "[1, 'The End of the War', 1424]\n", "[1, 'Chapter Review', 1432]\n", "[1, 'Chapter 23: The United States and the Cold War, 1945⠍1953', 1437]\n", "[1, 'Origins of the Cold War', 1442]\n", "[1, 'The Cold War and the Idea of Freedom', 1456]\n", "[1, 'The Truman Presidency', 1463]\n", "[1, 'The Anticommunist Crusade', 1471]\n", "[1, 'Chapter Review', 1488]\n", "[1, 'Chapter 24: An Affluent Society, 1953⠍1960', 1493]\n", "[1, 'The Golden Age', 1497]\n", "[1, 'The Eisenhower Era', 1519]\n", "[1, 'The Freedom Movement', 1533]\n", "[1, 'The Election of 1960', 1548]\n", "[1, 'Chapter Review', 1552]\n", "[1, 'Chapter 25: The Sixties, 1960⠍1968', 1557]\n", "[1, 'The Civil Rights Revolution', 1561]\n", "[1, 'The Kennedy Years', 1566]\n", "[1, 'Lyndon Johnson⠒s Presidency', 1571]\n", "[1, 'The Changing Black Movement', 1581]\n", "[1, 'Vietnam and the New Left', 1586]\n", "[1, 'The New Movements and the Rights Revolution', 1596]\n", "[1, '1968', 1617]\n", "[1, 'Chapter Review', 1622]\n", "[1, 'Chapter 26: The Conservative Turn, 1969⠍1988', 1628]\n", "[1, 'President Nixon', 1631]\n", "[1, 'Grassroots Rights Movements', 1638]\n", "[1, 'Foreign Policy and Watergate', 1643]\n", "[1, 'The End of the Golden Age', 1654]\n", "[1, 'The Rising Tide of Conservatism', 1667]\n", "[1, 'The Reagan Revolution', 1677]\n", "[1, 'Chapter Review', 1691]\n", "[1, 'Chapter 27: A New World Order, 1989⠍2004', 1696]\n", "[1, 'The Post⠍Cold War World', 1700]\n", "[1, 'Globalization and Its Discontents', 1709]\n", "[1, 'Culture Wars', 1720]\n", "[1, 'Impeachment and the Election of 2000', 1743]\n", "[1, 'The Attacks of September 11', 1747]\n", "[1, 'The War on Terrorism', 1750]\n", "[1, 'An American Empire?', 1754]\n", "[1, 'The Aftermath of September 11 at Home', 1759]\n", "[1, 'Chapter Review', 1764]\n", "[1, 'Chapter 28: A Divided Nation', 1769]\n", "[1, 'The Winds of Change', 1772]\n", "[1, 'The Great Recession', 1780]\n", "[1, 'Obama in Office', 1789]\n", "[1, 'The Obama Presidency', 1798]\n", "[1, 'President Trump', 1807]\n", "[1, '2020: Year of Crisis', 1820]\n", "[1, 'Freedom in the Twenty-First Century', 1831]\n", "[1, 'Chapter Review', 1841]\n", "[1, 'Suggested Reading', 1845]\n", "[1, 'The Declaration of Independence (1776)', 1909]\n", "[1, 'The Constitution of The United States (1787)', 1917]\n", "[1, 'Glossary', 1943]\n", "[1, 'Credits', 2008]\n", "[1, 'Index', 2016]\n" ] } ], "source": [ "import fitz\n", "doc = fitz.open('../data/raw/textbook.pdf')\n", "toc = doc.get_toc()\n", "for item in toc:\n", " print(item) # [level, title, pdf_page]" ] }, { "cell_type": "code", "execution_count": 11, "id": "991dbad2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Sections to process: 204\n", "[1, 'Chapter 1: Old Worlds and New', 59]\n", "[1, 'An Old World: North America', 63]\n", "[1, 'An Old World: West Africa', 73]\n", "[1, 'An Old World: Western Europe', 75]\n", "[1, 'Contact', 80]\n", "[1, 'The Spanish Empire', 88]\n", "[1, 'The French and Dutch Empires', 108]\n", "[1, 'Chapter Review', 120]\n", "[1, 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 124]\n", "[1, 'England and the Americas', 129]\n", "[1, 'Early English Exploration and Colonization', 138]\n", "[1, 'The Chesapeake', 142]\n", "[1, 'Origins of American Slavery', 150]\n", "[1, 'The New England Way', 157]\n", "[1, 'New Englanders Divided', 169]\n", "[1, 'Religion, Politics, and Freedom', 180]\n", "[1, 'Chapter Review', 188]\n", "[1, 'Chapter 3: Creating Anglo-America, 1660⠍1750', 193]\n", "[1, 'Global Competition and the Expansion of England⠒s Empire', 197]\n", "[1, 'Entrenchment of American Slavery', 206]\n", "[1, 'Colonies in Crisis', 216]\n", "[1, 'The Growth of Colonial America', 223]\n", "[1, 'Social Classes in the British Colonies', 238]\n", "[1, 'North America at Mid-Century', 246]\n", "[1, 'Chapter Review', 249]\n", "[1, 'Chapter 4: Slavery, Freedom, and the Struggle for Empire to 1763', 253]\n", "[1, 'Slavery and Empire', 257]\n", "[1, 'Slave Cultures and Slave Resistance', 274]\n", "[1, 'An Empire of Freedom', 280]\n", "[1, 'The Public Sphere', 285]\n", "[1, 'The Great Awakening', 294]\n", "[1, 'Imperial Rivalries', 298]\n", "[1, 'Battle for the Continent', 306]\n", "[1, 'Chapter Review', 320]\n", "[1, 'Chapter 5: The American Revolution, 1763⠍1783', 325]\n", "[1, 'The Crisis Begins', 329]\n", "[1, 'The Road to Revolution', 339]\n", "[1, 'The Coming of Independence', 345]\n", "[1, 'Securing Independence', 359]\n", "[1, 'Chapter Review', 375]\n", "[1, 'Chapter 6: The Revolution Within', 381]\n", "[1, 'Democratizing Freedom', 384]\n", "[1, 'Toward Religious Toleration', 392]\n", "[1, 'Defining Economic Freedom', 399]\n", "[1, 'The Limits of Liberty', 404]\n", "[1, 'Slavery and the Revolution', 410]\n", "[1, 'Daughters of Liberty', 422]\n", "[1, 'Chapter Review', 432]\n", "[1, 'Chapter 7: Founding a Nation, 1783⠍1791', 435]\n", "[1, 'America Under the Confederation', 439]\n", "[1, 'A New Constitution', 450]\n", "[1, 'The Ratification Debate and the Origin of the Bill of Rights', 460]\n", "[1, '“We the Peopleâ€\\x9d', 472]\n", "[1, 'Chapter Review', 486]\n", "[1, 'Chapter 8: Securing the Republic, 1791⠍1815', 491]\n", "[1, 'Politics in an Age of Passion', 494]\n", "[1, 'The Adams Presidency', 508]\n", "[1, 'Jefferson in Power', 522]\n", "[1, 'The “Second War of Independenceâ€\\x9d', 531]\n", "[1, 'Chapter Review', 542]\n", "[1, 'Chapter 9: The Market Revolution, 1800⠍1840', 548]\n", "[1, 'A New Economy', 552]\n", "[1, 'The Rise of the West', 558]\n", "[1, 'Market Society', 566]\n", "[1, 'The Free Individual', 582]\n", "[1, 'The Limits of Prosperity', 591]\n", "[1, 'Chapter Review', 601]\n", "[1, 'Chapter 10: Democracy in America, 1815⠍1840', 606]\n", "[1, 'The Triumph of Democracy', 610]\n", "[1, 'Nationalism and Its Discontents', 623]\n", "[1, 'Nation, Section, and Party', 630]\n", "[1, 'The Age of Jackson', 639]\n", "[1, 'Indian Removal', 647]\n", "[1, 'The Bank War and After', 657]\n", "[1, 'Chapter Review', 664]\n", "[1, 'Chapter 11: The Peculiar Institution', 669]\n", "[1, 'The Old South', 672]\n", "[1, 'Life Under Slavery', 690]\n", "[1, 'Slave Culture', 704]\n", "[1, 'Resistance to Slavery', 712]\n", "[1, 'Chapter Review', 722]\n", "[1, 'Chapter 12: An Age of Reform, 1820⠍1840', 725]\n", "[1, 'The Reform Impulse', 728]\n", "[1, 'The Crusade Against Slavery', 740]\n", "[1, 'Black and White Abolitionism', 755]\n", "[1, 'The Origins of Feminism', 761]\n", "[1, 'Chapter Review', 775]\n", "[1, 'Chapter 13: A House Divided, 1840⠍1861', 780]\n", "[1, 'Fruits of Manifest Destiny', 783]\n", "[1, 'A Dose of Arsenic', 803]\n", "[1, 'The Rise of the Republican Party', 814]\n", "[1, 'The Emergence of Lincoln', 821]\n", "[1, 'The Impending Crisis', 837]\n", "[1, 'Chapter Review', 844]\n", "[1, 'Chapter 14: A New Birth of Freedom: The Civil War, 1861⠍1865', 849]\n", "[1, 'The First Modern War', 853]\n", "[1, 'The Coming of Emancipation', 864]\n", "[1, 'The Second American Revolution', 876]\n", "[1, 'The Confederate Nation', 891]\n", "[1, 'Turning Points', 900]\n", "[1, 'Rehearsals for Reconstruction and the End of the War', 904]\n", "[1, 'Chapter Review', 912]\n", "[1, 'Chapter 15: “What Is Freedom?â€\\x9d: Reconstruction', 917]\n", "[1, 'The Meaning of Freedom', 921]\n", "[1, 'The Making of Radical Reconstruction', 938]\n", "[1, 'Radical Reconstruction in the South', 956]\n", "[1, 'The Overthrow of Reconstruction', 963]\n", "[1, 'Chapter Review', 972]\n", "[1, 'Chapter 16: America⠒s Gilded Age, 1870⠍1890', 976]\n", "[1, 'The Second Industrial Revolution', 980]\n", "[1, 'Freedom in the Gilded Age', 992]\n", "[1, 'Labor and the Republic', 999]\n", "[1, 'The Transformation of the West', 1009]\n", "[1, 'Politics in a Gilded Age', 1032]\n", "[1, 'Chapter Review', 1039]\n", "[1, 'Chapter 17: Freedom⠒s Boundaries, at Home and Abroad, 1890⠍1900', 1044]\n", "[1, 'The Populist Challenge', 1048]\n", "[1, 'The Segregated South', 1059]\n", "[1, 'Redrawing the Boundaries', 1075]\n", "[1, 'Becoming a World Power', 1082]\n", "[1, 'Chapter Review', 1101]\n", "[1, 'Chapter 18: The Progressive Era, 1900⠍1916', 1106]\n", "[1, 'An Urban Age and a Consumer Society', 1111]\n", "[1, 'Varieties of Progressivism', 1128]\n", "[1, 'The Politics of Progressivism', 1144]\n", "[1, 'The Progressive Presidents', 1158]\n", "[1, 'Chapter Review', 1170]\n", "[1, 'Chapter 19: Safe for Democracy: The United States and World War I', 1176]\n", "[1, 'An Era of Intervention', 1181]\n", "[1, 'America and the Great War', 1189]\n", "[1, 'The War at Home', 1195]\n", "[1, 'Who Is an American?', 1210]\n", "[1, '1919', 1227]\n", "[1, 'Chapter Review', 1239]\n", "[1, 'Chapter 20: From Business Culture to Great Depression: The Twenties, 1920⠍1932', 1244]\n", "[1, 'The Business of America', 1248]\n", "[1, 'Business and Government', 1258]\n", "[1, 'The Birth of Civil Liberties', 1267]\n", "[1, 'The Culture Wars', 1273]\n", "[1, 'The Great Depression', 1290]\n", "[1, 'Chapter Review', 1298]\n", "[1, 'Chapter 21: The New Deal, 1932⠍1940', 1303]\n", "[1, 'The First New Deal', 1308]\n", "[1, 'The Grassroots Revolt', 1321]\n", "[1, 'The Second New Deal', 1328]\n", "[1, 'A Reckoning With Liberty', 1333]\n", "[1, 'The Limits of Change', 1343]\n", "[1, 'A New Conception of America', 1353]\n", "[1, 'Chapter Review', 1362]\n", "[1, 'Chapter 22: Fighting for the Four Freedoms: World War II, 1941⠍1945', 1368]\n", "[1, 'Fighting World War II', 1374]\n", "[1, 'The Home Front', 1386]\n", "[1, 'Visions of Postwar Freedom', 1398]\n", "[1, 'The American Dilemma', 1403]\n", "[1, 'The End of the War', 1424]\n", "[1, 'Chapter Review', 1432]\n", "[1, 'Chapter 23: The United States and the Cold War, 1945⠍1953', 1437]\n", "[1, 'Origins of the Cold War', 1442]\n", "[1, 'The Cold War and the Idea of Freedom', 1456]\n", "[1, 'The Truman Presidency', 1463]\n", "[1, 'The Anticommunist Crusade', 1471]\n", "[1, 'Chapter Review', 1488]\n", "[1, 'Chapter 24: An Affluent Society, 1953⠍1960', 1493]\n", "[1, 'The Golden Age', 1497]\n", "[1, 'The Eisenhower Era', 1519]\n", "[1, 'The Freedom Movement', 1533]\n", "[1, 'The Election of 1960', 1548]\n", "[1, 'Chapter Review', 1552]\n", "[1, 'Chapter 25: The Sixties, 1960⠍1968', 1557]\n", "[1, 'The Civil Rights Revolution', 1561]\n", "[1, 'The Kennedy Years', 1566]\n", "[1, 'Lyndon Johnson⠒s Presidency', 1571]\n", "[1, 'The Changing Black Movement', 1581]\n", "[1, 'Vietnam and the New Left', 1586]\n", "[1, 'The New Movements and the Rights Revolution', 1596]\n", "[1, '1968', 1617]\n", "[1, 'Chapter Review', 1622]\n", "[1, 'Chapter 26: The Conservative Turn, 1969⠍1988', 1628]\n", "[1, 'President Nixon', 1631]\n", "[1, 'Grassroots Rights Movements', 1638]\n", "[1, 'Foreign Policy and Watergate', 1643]\n", "[1, 'The End of the Golden Age', 1654]\n", "[1, 'The Rising Tide of Conservatism', 1667]\n", "[1, 'The Reagan Revolution', 1677]\n", "[1, 'Chapter Review', 1691]\n", "[1, 'Chapter 27: A New World Order, 1989⠍2004', 1696]\n", "[1, 'The Post⠍Cold War World', 1700]\n", "[1, 'Globalization and Its Discontents', 1709]\n", "[1, 'Culture Wars', 1720]\n", "[1, 'Impeachment and the Election of 2000', 1743]\n", "[1, 'The Attacks of September 11', 1747]\n", "[1, 'The War on Terrorism', 1750]\n", "[1, 'An American Empire?', 1754]\n", "[1, 'The Aftermath of September 11 at Home', 1759]\n", "[1, 'Chapter Review', 1764]\n", "[1, 'Chapter 28: A Divided Nation', 1769]\n", "[1, 'The Winds of Change', 1772]\n", "[1, 'The Great Recession', 1780]\n", "[1, 'Obama in Office', 1789]\n", "[1, 'The Obama Presidency', 1798]\n", "[1, 'President Trump', 1807]\n", "[1, '2020: Year of Crisis', 1820]\n", "[1, 'Freedom in the Twenty-First Century', 1831]\n", "[1, 'Chapter Review', 1841]\n" ] } ], "source": [ "# Find where Chapter 1 starts and throw away everything before it\n", "start_index = next(i for i, item in enumerate(toc) if 'Chapter 1' in item[1])\n", "chapters_toc = toc[start_index:]\n", "\n", "# Also throw away back matter (Suggested Reading, Glossary, Index etc.)\n", "end_titles = {'Suggested Reading', 'The Declaration of Independence (1776)', \n", " 'The Constitution of The United States (1787)', 'Glossary', \n", " 'Credits', 'Index'}\n", "chapters_toc = [item for item in chapters_toc if item[1] not in end_titles]\n", "\n", "print(f\"Sections to process: {len(chapters_toc)}\")\n", "for item in chapters_toc:\n", " print(item)" ] }, { "cell_type": "code", "execution_count": 12, "id": "43e20197", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'title': 'Chapter 1: Old Worlds and New', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': True, 'start_pdf': 58, 'end_pdf': 61}\n", "{'title': 'An Old World: North America', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 62, 'end_pdf': 71}\n", "{'title': 'An Old World: West Africa', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 72, 'end_pdf': 73}\n", "{'title': 'An Old World: Western Europe', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 74, 'end_pdf': 78}\n", "{'title': 'Contact', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 79, 'end_pdf': 86}\n", "{'title': 'The Spanish Empire', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 87, 'end_pdf': 106}\n", "{'title': 'The French and Dutch Empires', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 107, 'end_pdf': 118}\n", "{'title': 'Chapter Review', 'chapter_num': 1, 'chapter_title': 'Chapter 1: Old Worlds and New', 'is_chapter_header': False, 'start_pdf': 119, 'end_pdf': 122}\n", "{'title': 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 'chapter_num': 2, 'chapter_title': 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 'is_chapter_header': True, 'start_pdf': 123, 'end_pdf': 127}\n", "{'title': 'England and the Americas', 'chapter_num': 2, 'chapter_title': 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 'is_chapter_header': False, 'start_pdf': 128, 'end_pdf': 136}\n", "{'title': 'Early English Exploration and Colonization', 'chapter_num': 2, 'chapter_title': 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 'is_chapter_header': False, 'start_pdf': 137, 'end_pdf': 140}\n", "{'title': 'The Chesapeake', 'chapter_num': 2, 'chapter_title': 'Chapter 2: European Colonies and Native Nations, 1600⠍1660', 'is_chapter_header': False, 'start_pdf': 141, 'end_pdf': 148}\n" ] } ], "source": [ "import re\n", "\n", "def parse_chapter_num(title):\n", " match = re.match(r'Chapter (\\d+):', title)\n", " return int(match.group(1)) if match else None\n", "\n", "structured = []\n", "current_chapter_num = None\n", "current_chapter_title = None\n", "\n", "for i, item in enumerate(chapters_toc):\n", " title = item[1]\n", " start_pdf = item[2] - 1 # 0-indexed\n", " end_pdf = (chapters_toc[i + 1][2] - 2) if i + 1 < len(chapters_toc) else doc.page_count - 1\n", "\n", " chapter_num = parse_chapter_num(title)\n", "\n", " if chapter_num:\n", " # This entry IS a chapter\n", " current_chapter_num = chapter_num\n", " current_chapter_title = title\n", " is_chapter_header = True\n", " else:\n", " is_chapter_header = False\n", "\n", " structured.append({\n", " \"title\": title,\n", " \"chapter_num\": current_chapter_num,\n", " \"chapter_title\": current_chapter_title,\n", " \"is_chapter_header\": is_chapter_header,\n", " \"start_pdf\": start_pdf,\n", " \"end_pdf\": end_pdf,\n", " })\n", "\n", "# Sanity check\n", "for s in structured[:12]:\n", " print(s)" ] }, { "cell_type": "code", "execution_count": 13, "id": "149bc714", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 'Half-title Page', 2, {'kind': 1, 'xref': 63504, 'page': 1, 'to': Point(76.47846, 86.92822), 'zoom': 0.0}]\n", "[1, 'Physical/Political Map of The United States', 5, {'kind': 1, 'xref': 63507, 'page': 4, 'to': Point(76.18479, 90.134159), 'zoom': 0.0}]\n", "[1, 'Political Map of The World', 6, {'kind': 1, 'xref': 63509, 'page': 5, 'to': Point(76.18479, 90.134159), 'zoom': 0.0}]\n", "[1, 'Title Page', 7, {'kind': 1, 'xref': 63511, 'page': 6, 'to': Point(76.47846, 86.92822), 'zoom': 0.0}]\n", "[1, 'Copyright', 10, {'kind': 1, 'xref': 63513, 'page': 9, 'to': Point(76.5, 87.0), 'zoom': 0.0}]\n", "[1, 'Dedication', 13, {'kind': 1, 'xref': 63515, 'page': 12, 'to': Point(76.5, 87.0), 'zoom': 0.0}]\n", "[1, 'Contents', 14, {'kind': 1, 'xref': 63517, 'page': 13, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n", "[1, 'List of Maps, Tables, and Figures', 22, {'kind': 1, 'xref': 63519, 'page': 21, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n", "[1, 'About the Authors', 32, {'kind': 1, 'xref': 63521, 'page': 31, 'to': Point(76.47846, 91.40668), 'zoom': 0.0}]\n", "[1, 'Preface', 34, {'kind': 1, 'xref': 63523, 'page': 33, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n", "[1, 'Resources For Students And Instructors', 54, {'kind': 1, 'xref': 63525, 'page': 53, 'to': Point(76.18479, 90.134159), 'zoom': 0.0}]\n", "[1, 'Chapter 1: Old Worlds and New', 59, {'kind': 1, 'xref': 63527, 'page': 58, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n", "[1, 'An Old World: North America', 63, {'kind': 1, 'xref': 63529, 'page': 62, 'to': Point(76.18479, 90.134159), 'zoom': 0.0}]\n", "[1, 'An Old World: West Africa', 73, {'kind': 1, 'xref': 63531, 'page': 72, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n", "[1, 'An Old World: Western Europe', 75, {'kind': 1, 'xref': 63533, 'page': 74, 'to': Point(76.5, 91.5), 'zoom': 0.0}]\n" ] } ], "source": [ "toc_full = doc.get_toc(simple=False)\n", "\n", "for item in toc_full[:15]:\n", " print(item)" ] }, { "cell_type": "code", "execution_count": 15, "id": "c2563864", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generated → /home/keshav/code/apush-rag/config/page_map.yaml\n", "Now open that file and fill in the real page numbers. Leave as null if unknown.\n" ] } ], "source": [ "import yaml\n", "from pathlib import Path\n", "\n", "project_root = Path().resolve().parent\n", "output_path = project_root / \"config\" / \"page_map.yaml\"\n", "\n", "page_map = {\"chapters\": {}}\n", "\n", "for section in structured:\n", " ch_num = section[\"chapter_num\"]\n", " ch_title = section[\"chapter_title\"]\n", " title = section[\"title\"]\n", "\n", " # Initialize chapter entry if first time seeing it\n", " if ch_num not in page_map[\"chapters\"]:\n", " page_map[\"chapters\"][ch_num] = {\n", " \"title\": ch_title,\n", " \"real_page\": None, # ← you fill this in\n", " \"sections\": {}\n", " }\n", "\n", " # Add section with null page — you fill these in\n", " if not section[\"is_chapter_header\"]:\n", " page_map[\"chapters\"][ch_num][\"sections\"][title] = None\n", "\n", "with open(output_path, \"w\") as f:\n", " yaml.dump(page_map, f, allow_unicode=True, sort_keys=False, default_flow_style=False)\n", "\n", "print(f\"Generated → {output_path}\")\n", "print(\"Now open that file and fill in the real page numbers. Leave as null if unknown.\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.3" } }, "nbformat": 4, "nbformat_minor": 5 }