Working with the 50-topic model

The video that accompanies this notebook is available at https://ucdavis.box.com/v/sts-205-notebook-8.

In this notebook, we will be doing more work with the 50-topic model we made for the State of the Union Addresses at the end of Notebook 7 and looking at some more ways to explore the results. Make sure that you have copied all functions from Notebook 7 into your functions.R file.

Start by loading packages and sourcing functions.

library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.3     ✓ purrr   0.3.4
✓ tibble  3.0.6     ✓ dplyr   1.0.4
✓ tidyr   1.1.2     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.0
── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(tidytext)
library(textstem)
Loading required package: koRpus.lang.en
Loading required package: koRpus
Loading required package: sylly
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
For information on available language packages for 'koRpus', run

  available.koRpus.lang()

and see ?install.koRpus.lang()


Attaching package: ‘koRpus’

The following object is masked from ‘package:readr’:

    tokenize
library(topicmodels)
#install.packages("wordcloud")
library(wordcloud)
Loading required package: RColorBrewer
source("functions.R")

Now build the sotu data frame and the sotu_dtm document-term matrix just like we did in the last notebook, but also add party to sotu (democratic, republic, other).

sotu <- make_sotu()

── Column specification ─────────────────────────────────────────────────────────────────────────────
cols(
  year = col_double(),
  pres = col_character(),
  use_last = col_logical()
)
#Add party
republicans <- c("Abraham Lincoln", "Ulysses S. Grant", "Rutherford B. Hayes", 
                 "James Garfield", "Chester A. Arthur", "Benjamin Harrison", 
                 "William McKinley", "Theodore Roosevelt", "William H. Taft", 
                 "Warren Harding", "Calvin Coolidge", "Herbert Hoover", 
                 "Dwight D. Eisenhower", "Richard Nixon", "Gerald R. Ford", 
                 "Ronald Reagan", "George H.W. Bush", "George W. Bush", "Donald J. Trump")
democrats <- c("Andrew Jackson", "Martin van Buren", "James Polk", 
               "Franklin Pierce", "James Buchanan", "Grover Cleveland", 
               "Woodrow Wilson", "Franklin D. Roosevelt", "Harry S. Truman", 
               "John F. Kennedy", "Lyndon B. Johnson", "Jimmy Carter",
               "William J. Clinton", "Barack Obama")
sotu$party <- ifelse(sotu$pres %in% republicans, "Republican",
              ifelse(sotu$pres %in% democrats, "Democratic", "Other"))

#Chunk into paragraphs
sotu_paragraphs <- data.frame()
for(i in 1:nrow(sotu)) {
  text <- str_split(sotu$text[i], " <p> ") %>% unlist
  paragraphs <- tibble(text) %>% 
                mutate(id = str_c(str_replace_all(sotu$pres[i], " ", "_"), 
                            "_", sotu$year[i], "_", 1:length(text)))
  sotu_paragraphs <- rbind(sotu_paragraphs, paragraphs)
}

#Tokenize by words and remove stopwords and words with digits
sotu_words <- sotu_paragraphs %>% unnest_tokens(word, text) %>%
                anti_join(stop_words) %>% filter(!str_detect(word, "[:digit:]"))
Joining, by = "word"
#Lemmatize
sotu_lemmas <- tibble(word = unique(sotu_words$word)) %>% 
                mutate(lemma = lemmatize_words(word))

#Cast document-term matrix
sotu_dtm <- left_join(sotu_words, sotu_lemmas) %>% 
                group_by(id) %>% count(lemma) %>% cast_dtm(id, lemma, n)
Joining, by = "word"

Read in the 50-topic LDA model we made at the end of Notebook 7.

sotu_lda_50a1 <- readRDS("sotu_lda_50_a1.RDS")

We will start by using the functions we made last week to explore the topic model. Since we are working with many topics now, we will make png files for each of the graphs.

png("topics.png", height = 15, width = 15, units = "in", res = 100)
  topics_5(sotu_lda_50a1)
dev.off()
png("topics_by_year.png", height = 15, width = 15, units = "in", res = 100)
  topics_years(sotu_lda_50a1)
dev.off()
png("hmap_50_topics.png", height = 15, width = 15, units = "in", res = 100)
  hmap(sotu_lda_50a1, sotu_dtm, 50)
dev.off()

I’m going to try turning alpha down a bit more, this time to 0.5.

# sotu_lda_50a05 <- LDA(sotu_dtm, k = 50, control = list(alpha = 0.5))
# saveRDS(sotu_lda_50a05, "sotu_lda_50_a05.RDS")

sotu_lda_50a05 <- readRDS("sotu_lda_50_a05.RDS")
png("topics_a05.png", height = 15, width = 15, units = "in", res = 100)
  topics_5(sotu_lda_50a05)
dev.off()
png("topics_by_year_a05.png", height = 15, width = 15, units = "in", res = 100)
  topics_years(sotu_lda_50a05)
dev.off()
png("hmap_50_topics_a05.png", height = 15, width = 15, units = "in", res = 100)
  hmap(sotu_lda_50a05, sotu_dtm, 50)
dev.off()

Explore topics in detail

To explore the topics in more detail, I’m going to make a word cloud for each one (be sure to create the folder “wordclouds” in your working directory before running this.

word_clouds <- function(lda) {
  beta <- tidy(lda, matrix = "beta")
  for (i in unique(beta$topic)) {
    png(str_c("wordclouds/t", i, ".png"), height = 3, width = 3, units = "in", res = 100)
      wordcloud(words = beta[beta$topic == i, ]$term, 
                freq = beta[beta$topic == i, ]$beta, 
                max.words = 200)
    dev.off()
  }
}
word_clouds(sotu_lda_50a05)
Error in dev.off() : 
  QuartzBitmap_Output - unable to open file 'wordclouds/t1.png'

I’m also going to graph the occurrence of each topic over time and by the party of the president who gave the address (create a folder called “chronology” before running this chunk).

chronology <- function(lda) {
  words <- augment(lda, data = sotu_dtm) 
  words$year <- words$document %>% str_replace("[A-z_\\.]+_", "") %>% 
                                   str_replace("_[0-9]+$", "") %>% as.numeric
  words$pres <- words$document %>% str_replace("_[0-9\\._]+$", "") %>%
                                   str_replace_all("_", " ")
  words$party <- ifelse(words$pres %in% democrats, "Democratic",
                 ifelse(words$pres %in% republicans, "Republican", "Other"))
  words <- words %>%
            group_by(year, party, .topic) %>% summarize(n = sum(count)) %>%
            ungroup %>% group_by(year) %>% mutate(p = n/sum(n))
  for (i in unique(words$.topic)) {
    graph <- ggplot(words[words$.topic == i, ], aes(x = year, y = p, color = party)) + 
      geom_point() +
      scale_color_manual(values = c("blue", "black", "red")) + guides(color = "none") +
      scale_y_continuous(labels = scales::percent) +
      theme_minimal() +
      labs(x = "Year", y = "Percent of Words", title = str_c("Topic ", i))
    png(str_c("chronology/t", i, ".png"), height = 3, width = 3, units = "in", res = 100)
        print(graph)
    dev.off()
  }
}
chronology(sotu_lda_50a05)
`summarise()` has grouped output by 'year', 'party'. You can override using the `.groups` argument.

I’m curious about topic 25 from sotu_lda_50_a05. Let’s look at the paragraphs that are most representative of that topic.

gamma <- tidy(sotu_lda_50a05, matrix = "gamma") %>% 
            filter(topic == 25) %>% arrange(-gamma) %>%
            top_n(25, gamma) %>% 
            left_join(sotu_paragraphs, by = c("document" = "id"))
for(i in 1:25) {
  print(str_c(gamma$document[i], ", Topic ", gamma$topic[i], ", gamma = ", gamma$gamma[i]))
  print(gamma$text[i])
}
[1] "Donald_J._Trump_2018_74, Topic 25, gamma = 0.359397052398856"
[1] "Here tonight are two fathers and two mothers:  Evelyn Rodriguez, Freddy Cuevas, Elizabeth Alvarado, and Robert Mickens.  Their two teenage daughters — Kayla Cuevas and Nisa Mickens — were close friends on Long Island.  But in September 2016, on the eve of Nisa’s 16th Birthday, neither of them came home.  These two precious girls were brutally murdered while walking together in their hometown.  Six members of the savage gang MS-13 have been charged with Kayla and Nisa’s murders.  Many of these gang members took advantage of glaring loopholes in our laws to enter the country as unaccompanied alien minors ‑- and wound up in Kayla and Nisa’s high school."
[1] "Andrew_Jackson_1829_48, Topic 25, gamma = 0.331746449913296"
[1] "On an examination of the records of the Treasury I have been forcibly struck with the large amount of public money which appears to be outstanding. Of the sum thus due from individuals to the Government a considerable portion is undoubtedly desperate, and in many instances has probably been rendered so by remissness in the agents charged with its collection. By proper exertions a great part, however, may yet be recovered; and what ever may be the portions respectively belonging to these two classes, it behooves the Government to ascertain the real state of the fact. This can be done only by the prompt adoption of judicious measures for the collection of such as may be made available. It is believed that a very large amount has been lost through the inadequacy of the means provided for the collection of debts due to the public, and that this inadequacy lies chiefly in the want of legal skill habitually and constantly employed in the direction of the agents engaged in the service. It must, I think, be admitted that the supervisory power over suits brought by the public, which is now vested in an accounting officer of the Treasury, not selected with a view to his legal knowledge, and encumbered as he is with numerous other duties, operates unfavorably to the public interest."
[1] "Grover_Cleveland_1893_165, Topic 25, gamma = 0.328830265465335"
[1] "Economy in public expenditure is a duty that can not innocently be neglected by those intrusted with the control of money drawn from the people for public uses. It must be confessed that our apparently endless resources, the familiarity of our people with immense accumulations of wealth, the growing sentiment among them that the expenditure of public money should in some manner be to their immediate and personal advantage, the indirect and almost stealthy manner in which a large part of our taxes is exacted, and a degenerated sense of official accountability have led to growing extravagance in governmental appropriations."
[1] "Andrew_Johnson_1868_32, Topic 25, gamma = 0.325619691395782"
[1] "Specie payments having been resumed by the Government and banks, all notes or bills of paper issued by either of a less denomination than $20 should by law be excluded from circulation, so that the people may have the benefit and convenience of a gold and silver currency which in all their business transactions will be uniform in value at home and abroad. Every man of property or industry, every man who desires to preserve what he honestly possesses or to obtain what he can honestly earn, has a direct interest in maintaining a safe circulating medium--such a medium as shall be real and substantial, not liable to vibrate with opinions, not subject to be blown up or blown down by the breath of speculation, but to be made stable and secure. A disordered currency is one of the greatest political evils. It undermines the virtues necessary for the support of the social system and encourages propensities destructive of its happiness; it wars against industry, frugality, and economy, and it fosters the evil spirits of extravagance and speculation. It has been asserted by one of our profound and most gifted statesmen that--Of all the contrivances for cheating the laboring classes of mankind, none has been more effectual than that which deludes them with paper money. This is the most effectual of inventions to fertilize the rich man's fields by the sweat of the poor man's brow. Ordinary tyranny, oppression, excessive taxation--these bear lightly on the happiness of the mass of the community compared with a fraudulent currency and the robberies committed by depreciated paper. Our own history has recorded for our instruction enough, and more than enough, of the demoralizing tendency, the injustice, and the intolerable oppression on the virtuous and well-disposed of a degraded paper currency authorized by law or in any way countenanced by government. It is one of the most successful devices, in times of peace or war, of expansions or revulsions, to accomplish the transfer of all the precious metals from the great mass of the people into the hands of the few, where they are hoarded in secret places or deposited under bolts and bars, while the people are left to endure all the inconvenience, sacrifice, and demoralization resulting from the use of depreciated and worthless paper."
[1] "James_Polk_1845_64, Topic 25, gamma = 0.317705120470799"
[1] "That banks, national or State, could not have been intended to be used as a substitute for the Treasury spoken of in the Constitution as keepers of the public money is manifest from the fact that at that time there was no national bank, and but three or four State banks, of limited Capital, existed in the country. Their employment as depositories was at first resorted to to a limited extent, but with no avowed intention of continuing them permanently in place of the Treasury of the Constitution. When they were afterwards from time to time employed, it was from motives of supposed convenience. Our experience has shown that when banking corporations have been the keepers of the public money, and been thereby made in effect the Treasury, the Government can have no guaranty that it can command the use of its own money for public purposes. The late Bank of the United States proved to be faithless. The State banks which were afterwards employed were faithless. But a few years ago, with millions of public money in their keeping, the Government was brought almost to bankruptcy and the public credit seriously impaired because of their inability or indisposition to pay on demand to the public creditors in the only currency recognized by the Constitution. Their failure occurred in a period of peace, and great inconvenience and loss were suffered by the public from it. Had the country been involved in a foreign war, that inconvenience and loss would have been much greater, and might have resulted in extreme public calamity. The public money should not be mingled with the private funds of banks or individuals or be used for private purposes. When it is placed in banks for safe-keeping, it is in effect loaned to them without interest, and is loaned by them upon interest to the borrowers from them. The public money is converted into banking capital, and is used and loaned out for the private profit of bank stockholders, and when called for, as was the case in 1837, it may be in the pockets of the borrowers from the banks instead of being in the public Treasury contemplated by the Constitution. The framers of the Constitution could never have intended that the money paid into the Treasury should be thus converted to private use and placed beyond the control of the Government."
[1] "Ulysses_S._Grant_1873_83, Topic 25, gamma = 0.304600965232851"
[1] "I have become impressed with the belief that the act approved March 2, 1867, entitled \"An act to establish a uniform system of bankruptcy throughout the United States,\" is productive of more evil than good at this time. Many considerations might be urged for its total repeal, but, if this is not considered advisable, I think it will not be seriously questioned that those portions of said act providing for what is called involuntary bankruptcy operate to increase the financial embarrassments of the country. Careful and prudent men very often become involved in debt in the transaction of their business, and though they may possess ample property, if it could be made available for that purpose, to meet all their liabilities, yet, on account of the extraordinary scarcity of money, they may be unable to meet all their pecuniary obligations as they become due, in consequence of which they are liable to be prostrated in their business by proceedings in bankruptcy at the instance of unrelenting creditors. People are now so easily alarmed as to monetary matters that the mere filing of a petition in bankruptcy by an unfriendly creditor will necessarily embarrass, and oftentimes accomplish the financial ruin, of a responsible business man. Those who otherwise might make lawful and just arrangements to relieve themselves from difficulties produced by the present stringency in money are prevented by their constant exposure to attack and disappointment by proceedings against them in bankruptcy, and, besides, the law is made use of in many cases by obdurate creditors to frighten or force debtors into a compliance with their wishes and into acts of injustice to other creditors and to themselves. I recommend that so much of said act as provides for involuntary bankruptcy on account of the suspension of payment be repealed."
[1] "Thomas_Jefferson_1801_18, Topic 25, gamma = 0.303163237851094"
[1] "In our care, too, of the public contributions intrusted to our direction it would be prudent to multiply barriers against their dissipation by appropriating specific sums to every specific purpose susceptible of definition; by disallowing all applications of money varying from the appropriation in object or transcending it in amount; by reducing the undefined field of contingencies and thereby circumscribing discretionary powers over money, and by bringing back to a single department all accountabilities for money, where the examinations may be prompt, efficacious, and uniform."
[1] "Martin_van_Buren_1838_32, Topic 25, gamma = 0.291428387258581"
[1] "The way in which this defalcation was so long concealed and the steps taken to indemnify the United States, as far as practicable, against loss will also be presented to you. The case is one which imperatively claims the attention of Congress and furnishes the strongest motive for the establishment of a more severe and secure system for the safe-keeping and disbursement of the public moneys than any that has heretofore existed."
[1] "Andrew_Johnson_1867_38, Topic 25, gamma = 0.287055946077249"
[1] "These are important facts and show how completely the inferior currency will supersede the better, forcing it from circulation among the masses and causing it to be exported as a mere article of trade, to add to the money capital of foreign lands. They show the necessity of retiring our paper money, that the return of gold and silver to the avenues of trade may be invited and a demand created which will cause the retention at home of at least so much of the productions of our rich and inexhaustible gold-bearing fields as may be sufficient for purposes of circulation. It is unreasonable to expect a return to a sound currency so long as the Government by continuing to issue irredeemable notes fills the channels of circulation with depreciated paper. Notwithstanding a coinage by our mints, since 1849, of $874,000,000, the people are now strangers to the currency which was designed for their use and benefit, and specimens of the precious metals bearing the national device are seldom seen, except when produced to gratify the interest excited by their novelty. If depreciated paper is to be continued as the permanent currency of the country, and all our coin is to become a mere article of traffic and speculation, to the enhancement in price of all that is indispensable to the comfort of the people, it would be wise economy to abolish our mints thus saving the nation the care and expense incident to such establishments, and let all our precious metals be exported in bullion. The time has come, however, when the Government and national banks should be required to take the most efficient steps and make all necessary arrangements for a resumption of specie payments at the earliest practicable period. Specie payments having been once resumed by the Government and banks, all notes or bills of paper issued by either of a less denomination than $20 should by law be excluded from circulation, so that the people may have the benefit and convenience of a gold and silver currency which in all their business transactions will be uniform in value at home and abroad. Every man of property or industry, every man who desires to preserve what he honestly possesses or to obtain what he can honestly earn, has a direct interest in maintaining a safe circulating medium--such a medium as shall be real and substantial, not liable to vibrate with opinions, not subject to be blown up or blown down by the breath of speculation, but to be made stable and secure. A disordered currency is one of the greatest political evils. It undermines the virtues necessary for the support of the social system and encourages propensities destructive of its happiness; it wars against industry, frugality, and economy, and it fosters the evil spirits of extravagance and speculation. It has been asserted by one of our profound and most gifted statesmen that--Of all the contrivances for cheating the laboring classes of mankind, none has been more effectual than that which deludes them with paper money. This is the most effectual of inventions to fertilize the rich man's fields by the sweat of the poor man's brow. Ordinary tyranny, oppression, excessive taxation--these bear lightly on the happiness of the mass of the community compared with a fraudulent currency and the robberies committed by depreciated paper. Our own history has recorded for our instruction enough, and more than enough, of the demoralizing tendency, the injustice, and the intolerable oppression on the virtuous and well disposed of a degraded paper currency authorized by law or in any way countenanced by government. It is one of the most successful devices, in times of peace or war, expansions or revulsions, to accomplish the transfer of all the precious metals from the great mass of the people into the hands of the few, where they are hoarded in secret places or deposited in strong boxes under bolts and bars, while the people are left to endure all the inconvenience, sacrifice, and demoralization resulting from the use of a depreciated and worthless paper money."
[1] "James_Buchanan_1859_57, Topic 25, gamma = 0.281592914965515"
[1] "We have yet scarcely recovered from the habits of extravagant expenditure produced by our overflowing Treasury during several years prior to the commencement of my Administration. The financial reverses which we have since experienced ought to teach us all to scrutinize our expenditures with the greatest vigilance and to reduce them to the lowest possible point. The Executive Departments of the Government have devoted themselves to the accomplishment of this object with considerable success, as will appear from their different reports and estimates. To these I invite the scrutiny of Congress, for the purpose of reducing them still lower, if this be practicable consistent with the great public interests of the country. In aid of the policy of retrenchment, I pledge myself to examine closely the bills appropriating lands or money, so that if any of these should inadvertently pass both Houses, as must sometimes be the case, I may afford them an opportunity for reconsideration. At the same time, we ought never to forget that true public economy consists not in withholding the means necessary to accomplish important national objects confided to us by the Constitution, but in taking care that the money appropriated for these purposes shall be faithfully and frugally expended."
[1] "Franklin_Pierce_1855_55, Topic 25, gamma = 0.279414701682617"
[1] "I am fully persuaded that it would be difficult to devise a system superior to that by which the fiscal business of the Government is now conducted. Notwithstanding the great number of public agents of collection and disbursement, it is believed that the checks and guards provided, including the requirement of monthly returns, render it scarcely possible for any considerable fraud on the part of those agents or neglect involving hazard of serious public loss to escape detection. I renew, however, the recommendation heretofore made by me of the enactment of a law declaring it felony on the part of public officers to insert false entries in their books of record or account or to make false returns, and also requiring them on the termination of their service to deliver to their successors all books, records, and other objects of a public nature in their custody."
[1] "Franklin_Pierce_1853_75, Topic 25, gamma = 0.27231344563736"
[1] "That wise economy which is as far removed from parsimony as from corrupt and corrupting extravagance; that single regard for the public good which will frown upon all attempts to approach the Treasury with insidious projects of private interest cloaked under public pretexts; that sound fiscal administration which, in the legislative department, guards against the dangerous temptations incident to overflowing revenue, and, in the executive, maintains an unsleeping watchfulness against the tendency of all national expenditure to extravagance, while they are admitted elementary political duties, may, I trust, be deemed as properly adverted to and urged in view of the more impressive sense of that necessity which is directly suggested by the considerations now presented."
[1] "Martin_van_Buren_1840_14, Topic 25, gamma = 0.272065710718007"
[1] "The present sound condition of their finances and the success with which embarrassments in regard to them, at times apparently insurmountable, have been overcome are matters upon which the people and Government of the United States may well congratulate themselves. An overflowing Treasury, however it may be regarded as an evidence of public prosperity, is seldom conducive to the permanent welfare of any people, and experience has demonstrated its incompatibility with the salutary action of political institutions like those of the United States. Our safest reliance for financial efficiency and independence has, on the contrary, been found to consist in ample resources unencumbered with debt, and in this respect the Federal Government occupies a singularly fortunate and truly enviable position."
[1] "Grover_Cleveland_1888_12, Topic 25, gamma = 0.268194221757199"
[1] "Instead of limiting the tribute drawn from our citizens to the necessities of its economical administration, the Government persists in exacting from the substance of the people millions which, unapplied and useless, lie dormant in its Treasury. This flagrant injustice and this breach of faith and obligation add to extortion the danger attending the diversion of the currency of the country from the legitimate channels of business."
[1] "James_Polk_1845_65, Topic 25, gamma = 0.267489380300641"
[1] "Banks which hold the public money are often tempted by a desire of gain to extend their loans, increase their circulation, and thus stimulate, if not produce, a spirit of speculation and extravagance which sooner or later must result in ruin to thousands. If the public money be not permitted to be thus used, but be kept in the Treasure and paid out to the public creditors in gold and silver, the temptation afforded by its deposit with banks to an undue expansion of their business would be checked, while the amount of the constitutional currency left in circulation would be enlarged by its employment in the public collections and disbursements, and the banks themselves would in consequence be found in a safer and sounder condition. At present State banks are employed as depositories, but without adequate regulation of law whereby the public money can be secured against the casualties and excesses, revulsions, suspensions, and defalcations to which from overissues, overtrading, an inordinate desire for gain, or other causes they are constantly exposed. The Secretary of the Treasury has in all cases when it was practicable taken collateral security for the amount which they hold, by the pledge of stocks of the United States or such of the States as were in good credit. Some of the deposit banks have given this description of security and others have declined to do so."
[1] "James_Polk_1845_63, Topic 25, gamma = 0.25913923764247"
[1] "By the Constitution of the United States it is provided that \"no money shall be drawn from the Treasury but in consequence of appropriations made by law.\" A public treasury was undoubtedly contemplated and intended to be created, in which the public money should be kept from the period of collection until needed for public uses. In the collection and disbursement of the public money no agencies have ever been employed by law except such as were appointed by the Government, directly responsible to it and under its control. The safe-keeping of the public money should be confided to a public treasury created by law and under like responsibility and control. It is not to be imagined that the framers of the Constitution could have intended that a treasury should be created as a place of deposit and safe-keeping of the public money which was irresponsible to the Government. The first Congress under the Constitution, by the act of the 2d of September, 1789, \"to establish the Treasury Department,\" provided for the appointment of a Treasurer, and made it his duty \"to receive and keep the moneys of the United States\" and \"at all times to submit to the Secretary of the Treasury and the Comptroller, or either of them, the inspection of the moneys in his hands.\""
[1] "Martin_van_Buren_1839_45, Topic 25, gamma = 0.258682983420575"
[1] "The continued agitation of the question relative to the best mode of keeping and disbursing the public money still injuriously affects the business of the country. The suspension of specie payments in 1837 rendered the use of deposit banks as prescribed by the act of 1836 a source rather of embarrassment than aid, and of necessity placed the custody of most of the public money afterwards collected in charge of the public officers. The new securities for its safety which this required were a principal cause of my convening an extra session of Congress, but in consequence of a disagreement between the two Houses neither then nor at any subsequent period has there been any legislation on the subject. The effort made at the last session to obtain the authority of Congress to punish the use of public money for private purposes as a crime a measure attended under other governments with signal advantage--was also unsuccessful, from diversities of opinion in that body, notwithstanding the anxiety doubtless felt by it to afford every practicable security. The result of this is still to leave the custody of the public money without those safeguards which have been for several years earnestly desired by the Executive, and as the remedy is only to be found in the action of the Legislature it imposes on me the duty of again submitting to you the propriety of passing a law providing for the safe-keeping of the public moneys, and especially to ask that its use for private purposes by any officers intrusted with it may be declared to be a felony, punishable with penalties proportioned to the magnitude of the offense."
[1] "Andrew_Jackson_1830_47, Topic 25, gamma = 0.257843184429305"
[1] "This mode of aiding such works is also in its nature deceptive, and in many cases conducive to improvidence in the administration of the national funds. Appropriations will be obtained with much greater facility and granted with less security to the public interest when the measure is thus disguised than when definite and direct expenditures of money are asked for. The interests of the nation would doubtless be better served by avoiding all such indirect modes of aiding particular objects. In a government like ours more especially should all public acts be, as far as practicable, simple, undisguised, and intelligible, that they may become fit subjects for the approbation to animadversion of the people."
[1] "John_Tyler_1844_29, Topic 25, gamma = 0.253908346609823"
[1] "It must also be a matter of unmingled gratification that under the existing financial system (resting upon the act of 1789 and the resolution of 1816) the currency of the country has attained a state of perfect soundness; and the rates of exchange between different parts of the Union, which in 1841 denoted by their enormous amount the great depreciation and, in fact, worthlessness of the currency in most of the States, are now reduced to little more than the mere expense of transporting specie from place to place and the risk incident to the operation. In a new country like that of the United States, where so many inducements are held out for speculation, the depositories of the surplus revenue, consisting of banks of any description, when it reaches any considerable amount, require the closest vigilance on the part of the Government. All banking institutions, under whatever denomination they may pass, are governed by an almost exclusive regard to the interest of the stockholders. That interest consists in the augmentation of profits in the form of dividends, and a large surplus revenue intrusted to their custody is but too apt to lead to excessive loans and to extravagantly large issues of paper. As a necessary consequence prices are nominally increased and the speculative mania very soon seizes upon the public mind. A fictitious state of prosperity for a season exists, and, in the language of the day, money becomes plenty. Contracts are entered into by individuals resting on this unsubstantial state of things, but the delusion speedily passes away and the country is overrun with an indebtedness so weighty as to overwhelm many and to visit every department of industry with great and ruinous embarrassment. The greatest vigilance becomes necessary on the part of Government to guard against this state of things. The depositories must be given distinctly to understand that the favors of the Government will be altogether withdrawn, or substantially diminished, if its revenues shall be regarded as additions to their banking capital or as the foundation of an enlarged circulation."
[1] "Grover_Cleveland_1887_16, Topic 25, gamma = 0.252523397696021"
[1] "Of course it is not expected that unnecessary and extravagant appropriations will be made for the purpose of avoiding the accumulation of an excess of revenue. Such expenditure, besides the demoralization of all just conceptions of public duty which it entails, stimulates a habit of reckless improvidence not in the least consistent with the mission of our people or the high and beneficent purposes of our Government."
[1] "Theodore_Roosevelt_1905_31, Topic 25, gamma = 0.251028402540763"
[1] "I earnestly recommend to Congress the need of economy and to this end of a rigid scrutiny of appropriations. As examples merely, I call your attention to one or two specific matters. All unnecessary offices should be abolished. The Commissioner of the General Land Office recommends the abolishment of the office of Receiver of Public Moneys for the United States Land Office. This will effect a saving of about a quarter of a million dollars a year. As the business of the Nation grows, it is inevitable that there should be from time to time a legitimate increase in the number of officials, and this fact renders it all the more important that when offices become unnecessary they should be abolished. In the public printing also a large saving of public money can be made. There is a constantly growing tendency to publish masses of unimportant information. It is probably not unfair to say that many tens of thousands of volumes are published at which no human being ever looks and for which there is no real demand whatever."
[1] "Martin_van_Buren_1840_26, Topic 25, gamma = 0.243555797462158"
[1] "The consideration that a large public debt affords an apology, and produces in some degree a necessity also, for resorting to a system and extent of taxation which is not only oppressive throughout, but is likewise so apt to lead in the end to the commission of that most odious of all offenses against the principles of republican government, the prostitution of political power, conferred for the general benefit, to the aggrandizement of particular classes and the gratification of individual cupidity, is alone sufficient, independently of the weighty objections which have already been urged, to render its creation and existence the sources of bitter and unappeasable discord. If we add to this its inevitable tendency to produce and foster extravagant expenditures of the public moneys, by which a necessity is created for new loans and new burdens on the people, and, finally, refer to the examples of every government which has existed for proof, how seldom it is that the system, when once adopted and implanted in the policy of a country, has failed to expand itself until public credit was exhausted and the people were no longer able to endure its increasing weight, it seems impossible to resist the conclusion that no benefits resulting from its career, no extent of conquest, no accession of wealth to particular classes, nor any nor all its combined advantages, can counterbalance its ultimate but certain results--a splendid government and an impoverished people."
[1] "Martin_van_Buren_1840_20, Topic 25, gamma = 0.238366042206756"
[1] "The policy of the Federal Government in extinguishing as rapidly as possible the national debt, and subsequently in resisting every temptation to create a new one, deserves to be regarded in the same favorable light. Among the many objections to a national debt, the certain tendency of public securities to concentrate ultimately in the coffers of foreign stockholders is one which is every day gathering strength. Already have the resources of many of the States and the future industry of their citizens been indefinitely mortgaged to the subjects of European Governments to the amount of twelve millions annually to pay the constantly accruing interest on borrowed money--a sum exceeding half the ordinary revenues of the whole United States. The pretext which this relation affords to foreigners to scrutinize the management of our domestic affairs, if not actually to intermeddle with them, presents a subject for earnest attention, not to say of serious alarm. Fortunately, the Federal Government, with the exception of an obligation entered into in behalf of the District of Columbia, which must soon be discharged, is wholly exempt from any such embarrassment. It is also, as is believed, the only Government which, having fully and faithfully paid all its creditors, has also relieved itself entirely from debt. To maintain a distinction so desirable and so honorable to our national character should be an object of earnest solicitude. Never should a free people, if it be possible to avoid it, expose themselves to the necessity of having to treat of the peace, the honor, or the safety of the Republic with the governments of foreign creditors, who, however well disposed they may be to cultivate with us in general friendly relations, are nevertheless by the law of their own condition made hostile to the success and permanency of political institutions like ours. Most humiliating may be the embarrassments consequent upon such a condition. Another objection, scarcely less formidable, to the commencement of a new debt is its inevitable tendency to increase in magnitude and to foster national extravagance. He has been an unprofitable observer of events who needs at this day to be admonished of the difficulties which a government habitually dependent on loans to sustain its ordinary expenditures has to encounter in resisting the influences constantly exerted in favor of additional loans; by capitalists, who enrich themselves by government securities for amounts much exceeding the money they actually advance--a prolific source of individual aggrandizement in all borrowing countries; by stockholders, who seek their gains in the rise and fall of public stocks; and by the selfish importunities of applicants for appropriations for works avowedly for the accommodation of the public, but the real objects of which are too frequently the advancement of private interests. The known necessity which so many of the States will be under to impose taxes for the payment of the interest on their debts furnishes an additional and very cogent reason why the Federal Governments should refrain from creating a national debt, by which the people would be exposed to double taxation for a similar object. We possess within ourselves ample resources for every emergency, and we may be quite sure that our citizens in no future exigency will be unwilling to supply the Government with all the means asked for the defense of the country. In time of peace there can, at all events, be no justification for the creation of a permanent debt by the Federal Government. Its limited range of constitutional duties may certainly under such circumstances be performed without such a resort. It has, it is seen, been avoided during four years of greater fiscal difficulties than have existed in a similar period since the adoption of the Constitution, and one also remarkable for the occurrence of extraordinary causes of expenditures."
[1] "Andrew_Jackson_1836_45, Topic 25, gamma = 0.231248844262851"
[1] "As already intimated, my views have undergone a change so far as to be convinced that no alteration of the Constitution in this respect is wise or expedient. The influence of an accumulating surplus upon the credit system of the country, producing dangerous extensions and ruinous contractions, fluctuations in the price of property, rash speculation, idleness, extravagance, and a deterioration of morals, have taught us the important lesson that any transient mischief which may attend the reduction of our revenue to the wants of our Government is to be borne in preference to an over-flowing treasury."
[1] "Andrew_Jackson_1833_46, Topic 25, gamma = 0.23083227852101"
[1] "The public convenience requires that another building should be erected as soon as practicable, and in providing for it it will be advisable to enlarge in some manner the accommodations for the public officers of the several Departments, and to authorize the erection of suitable depositories for the safe-keeping of the public documents and records."

Looking at these paragraphs might give me a sense of how the “free world” rhetoric of the Cold War originated and what its antecedents were.

We can write a general function to explore any topic in the same way.

explore <- function(lda, t) {
  gamma <- tidy(lda, matrix = "gamma") %>% filter(topic == t) %>% 
            top_n(25, gamma) %>%  left_join(sotu_paragraphs, by = c("document" = "id"))
  for(i in 1:25) {
    print(str_c(gamma$document[i], ", Topic ", gamma$topic[i], ", gamma = ", gamma$gamma[i]))
    print(gamma$text[i])
  }
}
#Call the function for topic 42 to see paragraphs about health care
explore(sotu_lda_50a05, 42)
[1] "Andrew_Jackson_1829_61, Topic 42, gamma = 0.20997730244661"
[1] "The condition and ulterior destiny of the Indian tribes within the limits of some of our States have become objects of much interest and importance. It has long been the policy of Government to introduce among them the arts of civilization, in the hope of gradually reclaiming them from a wandering life. This policy has, however, been coupled with another wholly incompatible with its success. Professing a desire to civilize and settle them, we have at the same time lost no opportunity to purchase their lands and thrust them farther into the wilderness. By this means they have not only been kept in a wandering state, but been led to look upon us as unjust and indifferent to their fate. Thus, though lavish in its expenditures upon the subject, Government has constantly defeated its own policy, and the Indians in general, receding farther and farther to the west, have retained their savage habits. A portion, however, of the Southern tribes, having mingled much with the whites and made some progress in the arts of civilized life, have lately attempted to erect an independent government within the limits of Georgia and Alabama. These States, claiming to be the only sovereigns within their territories, extended their laws over the Indians, which induced the latter to call upon the United States for protection."
[1] "Calvin_Coolidge_1924_77, Topic 42, gamma = 0.23696615395757"
[1] "While we are desirous of promoting peace in every quarter of the globe, we have a special interest in the peace of this hemisphere. It is our constant desire that all causes of dispute in this area may be tranquilly and satisfactorily adjusted. Along with our desire for peace is the earnest hope for the increased prosperity of our sister republics of Latin America, and our constant purpose to promote cooperation with them which may be mutually beneficial and always inspired by the most cordial friendships."
[1] "Franklin_D._Roosevelt_1936_5, Topic 42, gamma = 0.221954680288154"
[1] "In the years that have followed, that sentiment has remained the dedication of this Nation. Among the Nations of the great Western Hemisphere the policy of the good neighbor has happily prevailed. At no time in the four and a half centuries of modern civilization in the Americas has there existed--in any year, in any decade, in any generation in all that time--a greater spirit of mutual understanding, of common helpfulness, and of devotion to the ideals of serf-government than exists today in the twenty-one American Republics and their neighbor, the Dominion of Canada. This policy of the good neighbor among the Americas is no longer a hope, no longer an objective remaining to be accomplished. It is a fact, active, present, pertinent and effective. In this achievement, every American Nation takes an understanding part. There is neither war, nor rumor of war, nor desire for war. The inhabitants of this vast area, two hundred and fifty million strong, spreading more than eight thousand miles from the Arctic to the Antarctic, believe in, and propose to follow, the policy of the good neighbor. They wish with all their heart that the rest of the world might do likewise."
[1] "Franklin_D._Roosevelt_1940_32, Topic 42, gamma = 0.231104006673186"
[1] "Twenty-one American Republics, expressing the will of two hundred and fifty million people to preserve peace and freedom in this Hemisphere, are displaying a unanimity of ideals and practical relationships which gives hope that what is being done here can be done on other continents. We in all the Americas are coming to the realization that we can retain our respective nationalities without, at the same time, threatening the national existence of our neighbors."
[1] "George_Washington_1796_27, Topic 42, gamma = 0.242579596438945"
[1] "It has been my constant, sincere, and earnest wish, in conformity with that of our nation, to maintain cordial harmony and a perfectly friendly understanding with that Republic. This wish remains unabated, and I shall persevere in the endeavor to fulfill it to the utmost extent of what shall be consistent with a just and indispensable regard to the rights and honor of our country; nor will I easily cease to cherish the expectation that a spirit of justice, candor, and friendship on the part of the Republic will eventually insure success."
[1] "Grover_Cleveland_1886_28, Topic 42, gamma = 0.275999499751474"
[1] "The weakness of Liberia and the difficulty of maintaining effective sovereignty over its outlying districts have exposed that Republic to encroachment. It can not be forgotten that this distant community is an offshoot of our own system, owing its origin to the associated benevolence of American citizens, whose praiseworthy efforts to create a nucleus of civilization in the Dark Continent have commanded respect and sympathy everywhere, especially in this country. Although a formal protectorate over Liberia is contrary to our traditional policy, the moral right and duty of the United States to assist in all proper ways in the maintenance of its integrity is obvious, and has been consistently announced during nearly half a century. I recommend that in the reorganization of our Navy a small vessel, no longer found adequate to our needs, be presented to Liberia, to be employed by it in the protection of its coastwise revenues."
[1] "Grover_Cleveland_1893_43, Topic 42, gamma = 0.216351231874789"
[1] "The reproduced caravel Santa Maria, built by Spain and sent to the Columbian Exposition, has been presented to the United States in token of amity and in commemoration of the event it was designed to celebrate. I recommend that in accepting this gift Congress make grateful recognition of the sincere friendship which prompted it."
[1] "Grover_Cleveland_1896_20, Topic 42, gamma = 0.248830896333414"
[1] "The correctness of this forecast need be neither affirmed nor denied. The United States has, nevertheless, a character to maintain as a nation, which plainly dictates that right and not might should be the rule of its conduct. Further, though the United States is not a nation to which peace is a necessity, it is in truth the most pacific of powers and desires nothing so much as to live in amity with all the world. Its own ample and diversified domains satisfy all possible longings for territory, preclude all dreams of conquest, and prevent any casting of covetous eyes upon neighboring regions, however attractive. That our conduct toward Spain and her dominions has constituted no exception to this national disposition is made manifest by the course of our Government, not only thus far during the present insurrection, but during the ten years that followed the rising at Yara in 1868. No other great power, it may safely be said, under circumstances of similar perplexity, would have manifested the same restraint and the same patient endurance. It may also be said that this persistent attitude of the United States toward Spain in connection with Cuba unquestionably evinces no slight respect and regard for Spain on the part of the American people. They in truth do not forget her connection with the discovery of the Western Hemisphere, nor do they underestimate the great qualities of the Spanish people nor fail to fully recognize their splendid patriotism and their chivalrous devotion to the national honor."
[1] "Harry_S._Truman_1946_74, Topic 42, gamma = 0.247524813242289"
[1] "9. We believe that the sovereign states of the Western Hemisphere, without interference from outside the Western Hemisphere, must work together as good neighbors in the solution of their common problems."
[1] "James_Polk_1845_37, Topic 42, gamma = 0.225102592185137"
[1] "The rapid extension of our settlements over our territories heretofore unoccupied, the addition of new States to our Confederacy, the expansion of free principles, and our rising greatness as a nation are attracting the attention of the powers of Europe, and lately the doctrine has been broached in some of them of a \"balance of power\" on this continent to check our advancement. The United States, sincerely desirous of preserving relations of good understanding with all nations, can not in silence permit any European interference on the North American continent, and should any such interference be attempted will be ready to resist it at any and all hazards."
[1] "James_Polk_1846_5, Topic 42, gamma = 0.21606454272977"
[1] "It is a source of high satisfaction to know that the relations of the United States with all other nations, with a single exception, are of the most amicable character. Sincerely attached to the policy of peace early adopted and steadily pursued by this Government, I have anxiously desired to cultivate and cherish friendship and commerce with every foreign power. The spirit and habits of the American people are favorable to the maintenance of such international harmony. In adhering to this wise policy, a preliminary and paramount duty obviously consists in the protection of our national interests from encroachment or sacrifice and our national honor from reproach. These must be maintained at any hazard. They admit of no compromise or neglect, and must be scrupulously and constantly guarded. In their vigilant vindication collision and conflict with foreign powers may sometimes become unavoidable. Such has been our scrupulous adherence to the dictates of justice in all our foreign intercourse that, though steadily and rapidly advancing in prosperity and power, we have given no just cause of complaint to any nation and have enjoyed the blessings of peace for more than thirty years. From a policy so sacred to humanity and so salutary in its effects upon our political system we should never be induced voluntarily to depart."
[1] "James_Polk_1847_48, Topic 42, gamma = 0.253416593349905"
[1] "It has never been contemplated by me, as an object of the war, to make a permanent conquest of the Republic of Mexico or to annihilate her separate existence as an independent nation. On the contrary, it has ever been my desire that she should maintain her nationality, and under a good government adapted to her condition be a free, independent, and prosperous Republic. The United States were the first among the nations to recognize her independence, and have always desired to be on terms of amity and good neighborhood with her. This she would not suffer. By her own conduct we have been compelled to engage in the present war. In its prosecution we seek not her overthrow as a nation, but in vindicating our national honor we seek to obtain redress for the wrongs she has done us and indemnity for our just demands against her. We demand an honorable peace, and that peace must bring with it indemnity for the past and security for the future. Hitherto Mexico has refused all accommodation by which such a peace could be obtained."
[1] "James_Polk_1847_54, Topic 42, gamma = 0.246590963984291"
[1] "Mexico is our near neighbor, and her boundaries are coterminous with our own through the whole extent across the North American continent, from ocean to ocean. Both politically and commercially we have the deepest interest in her regeneration and prosperity. Indeed, it is impossible that, with any just regard to our own safety, we can ever become indifferent to her fate."
[1] "Millard_Fillmore_1852_67, Topic 42, gamma = 0.264632675141642"
[1] "But it is now said by some that this policy must be changed. Europe is no longer separated from us by a voyage of months, but steam navigation has brought her within a few days' sail of our shores. We see more of her movements and take a deeper interest in her controversies. Although no one proposes that we should join the fraternity of potentates who have for ages lavished the blood and treasure of their subjects in maintaining \"the balance of power,\" yet it is said that we ought to interfere between contending sovereigns and their subjects for the purpose of overthrowing the monarchies of Europe and establishing in their place republican institutions. It is alleged that we have heretofore pursued a different course from a sense of our weakness, but that now our conscious strength dictates a change of policy, and that it is consequently our duty to mingle in these contests and aid those who are struggling for liberty."
[1] "Millard_Fillmore_1852_68, Topic 42, gamma = 0.254788724971466"
[1] "This is a most seductive but dangerous appeal to the generous sympathies of freemen. Enjoying, as we do, the blessings of a free Government, there is no man who has an American heart that would not rejoice to see these blessings extended to all other nations. We can not witness the struggle between the oppressed and his oppressor anywhere without the deepest sympathy for the former and the most anxious desire for his triumph. Nevertheless, is it prudent or is it wise to involve ourselves in these foreign wars? Is it indeed true that we have heretofore refrained from doing so merely from the degrading motive of a conscious weakness? For the honor of the patriots who have gone before us, I can not admit it. Men of the Revolution, who drew the sword against the oppressions of the mother country and pledged to Heaven \"their lives, their fortunes, and their sacred honor\" to maintain their freedom, could never have been actuated by so unworthy a motive. They knew no weakness or fear where right or duty pointed the way, and it is a libel upon their fair fame for us, while we enjoy the blessings for which they so nobly fought and bled, to insinuate it. The truth is that the course which they pursued was dictated by a stern sense of international justice, by a statesmanlike prudence and a far-seeing wisdom, looking not merely to the present necessities but to the permanent safety and interest of the country. They knew that the world is governed less by sympathy than by reason and force; that it was not possible for this nation to become a \"propagandist\" of free principles without arraying against it the combined powers of Europe, and that the result was more likely to be the overthrow of republican liberty here than its establishment there. History has been written in vain for those who can doubt this. France had no sooner established a republican form of government than she manifested a desire to force its blessings on all the world. Her own historian informs us that, hearing of some petty acts of tyranny in a neighboring principality, \"the National Convention declared that she would afford succor and fraternity to all nations who wished to recover their liberty, and she gave it in charge to the executive power to give orders to the generals of the French armies to aid all citizens who might have been or should be oppressed in the cause of liberty.\" Here was the false step which led to her subsequent misfortunes. She soon found herself involved in war with all the rest of Europe. In less than ten years her Government was changed from a republic to an empire, and finally, after shedding rivers of blood, foreign powers restored her exiled dynasty and exhausted Europe sought peace and repose in the unquestioned ascendency of monarchical principles. Let us learn wisdom from her example. Let us remember that revolutions do not always establish freedom. Our own free institutions were not the offspring of our Revolution. They existed before. They were planted in the free charters of self-government under which the English colonies grew up, and our Revolution only freed us from the dominion of a foreign power whose government was at variance with those institutions. But European nations have had no such training for self-government, and every effort to establish it by bloody revolutions has been, and must without that preparation continue to be, a failure. Liberty unregulated by law degenerates into anarchy, which soon becomes the most horrid of all despotisms. Our policy is wisely to govern ourselves, and thereby to set such an example of national justice, prosperity, and true glory as shall teach to all nations the blessings of self-government and the unparalleled enterprise and success of a free people."
[1] "Theodore_Roosevelt_1901_101, Topic 42, gamma = 0.217757176215842"
[1] "Our people intend to abide by the Monroe Doctrine and to insist upon it as the one sure means of securing the peace of the Western Hemisphere. The Navy offers us the only means of making our insistence upon the Monroe Doctrine anything but a subject of derision to whatever nation chooses to disregard it. We desire the peace which comes as of right to the just man armed; not the peace granted on terms of ignominy to the craven and the weakling."
[1] "Theodore_Roosevelt_1904_107, Topic 42, gamma = 0.218457116707234"
[1] "It is not true that the United States feels any land hunger or entertains any projects as regards the other nations of the Western Hemisphere save such as are for their welfare. All that this country desires is to see the neighboring countries stable, orderly, and prosperous. Any country whose people conduct themselves well can count upon our hearty friendship. If a nation shows that it knows how to act with reasonable efficiency and decency in social and political matters, if it keeps order and pays its obligations, it need fear no interference from the United States. Chronic wrongdoing, or an impotence which results in a general loosening of the ties of civilized society, may in America, as elsewhere, ultimately require intervention by some civilized nation, and in the Western Hemisphere the adherence of the United States to the Monroe Doctrine may force the United States, however reluctantly, in flagrant cases of such wrongdoing or impotence, to the exercise of an international police power. If every country washed by the Caribbean Sea would show the progress in stable and just civilization which with the aid of the Platt amendment Cuba has shown since our troops left the island, and which so many of the republics in both Americas are constantly and brilliantly showing, all question of interference by this Nation with their affairs would be at an end. Our interests and those of our southern neighbors are in reality identical. They have great natural riches, and if within their borders the reign of law and justice obtains, prosperity is sure to come to them. While they thus obey the primary laws of civilized society they may rest assured that they will be treated by us in a spirit of cordial and helpful sympathy. We would interfere with them only in the last resort, and then only if it became evident that their inability or unwillingness to do justice at home and abroad had violated the rights of the United States or had invited foreign aggression to the detriment of the entire body of American nations. It is a mere truism to say that every nation, whether in America or anywhere else, which desires to maintain its freedom, its independence, must ultimately realize that the right of such independence can not be separated from the responsibility of making good use of it."
[1] "Theodore_Roosevelt_1905_46, Topic 42, gamma = 0.237517961445412"
[1] "One of the most effective instruments for peace is the Monroe Doctrine as it has been and is being gradually developed by this Nation and accepted by other nations. No other policy could have been as efficient in promoting peace in the Western Hemisphere and in giving to each nation thereon the chance to develop along its own lines. If we had refused to apply the doctrine to changing conditions it would now be completely outworn, would not meet any of the needs of the present day, and, indeed, would probably by this time have sunk into complete oblivion. It is useful at home, and is meeting with recognition abroad because we have adapted our application of it to meet the growing and changing needs of the hemisphere. When we announce a policy such as the Monroe Doctrine we thereby commit ourselves to the consequences of the policy, and those consequences from time to time alter. It is out of the question to claim a right and yet shirk the responsibility for its exercise. Not only we, but all American republics who are benefited by the existence of the doctrine, must recognize the obligations each nation is under as regards foreign peoples no less than its duty to insist upon its own rights."
[1] "Theodore_Roosevelt_1905_48, Topic 42, gamma = 0.29444433053494"
[1] "There are certain essential points which must never be forgotten as regards the Monroe Doctrine. In the first place we must as a Nation make it evident that we do not intend to treat it in any shape or way as an excuse for aggrandizement on our part at the expense of the republics to the south. We must recognize the fact that in some South American countries there has been much suspicion lest we should interpret the Monroe Doctrine as in some way inimical to their interests, and we must try to convince all the other nations of this continent once and for all that no just and orderly Government has anything to fear from us. There are certain republics to the south of us which have already reached such a point of stability, order, and prosperity that they themselves, though as yet hardly consciously, are among the guarantors of this doctrine. These republics we now meet not only on a basis of entire equality, but in a spirit of frank and respectful friendship, which we hope is mutual. If all of the republics to the south of us will only grow as those to which I allude have already grown, all need for us to be the especial champions of the doctrine will disappear, for no stable and growing American Republic wishes to see some great non-American military power acquire territory in its neighborhood. All that this country desires is that the other republics on this continent shall be happy and prosperous; and they cannot be happy and prosperous unless they maintain order within their boundaries and behave with a just regard for their obligations toward outsiders. It must be understood that under no circumstances will the United States use the Monroe Doctrine as a cloak for territorial aggression. We desire peace with all the world, but perhaps most of all with the other peoples of the American Continent. There are, of course, limits to the wrongs which any self-respecting nation can endure. It is always possible that wrong actions toward this Nation, or toward citizens of this Nation, in some State unable to keep order among its own people, unable to secure justice from outsiders, and unwilling to do justice to those outsiders who treat it well, may result in our having to take action to protect our rights; but such action will not be taken with a view to territorial aggression, and it will be taken at all only with extreme reluctance and when it has become evident that every other resource has been exhausted."
[1] "Ulysses_S._Grant_1870_9, Topic 42, gamma = 0.281596428293005"
[1] "The allied and other Republics of Spanish origin on this continent may see in this fact a new proof of our sincere interest in their welfare, of our desire to see them blessed with good governments, capable of maintaining order and of preserving their respective territorial integrity, and of our sincere wish to extend our own commercial and social relations with them. The time is not probably far distant when, in the natural course of events, the European political connection with this continent will cease. Our policy should be shaped, in view of this probability, so as to ally the commercial interests of the Spanish American States more closely to our own, and thus give the United States all the preeminence and all the advantage which Mr. Monroe, Mr. Adams, and Mr. Clay contemplated when they proposed to join in the congress of Panama."
[1] "Ulysses_S._Grant_1872_37, Topic 42, gamma = 0.226838711777394"
[1] "The law of emancipation, which was passed more than two years since, has remained unexecuted in the absence of regulations for its enforcement. It was but a feeble step toward emancipation, but it was the recognition of right, and was hailed as such, and exhibited Spain in harmony with sentiments of humanity and of justice and in sympathy with the other powers of the Christian and civilized world."
[1] "William_H._Taft_1910_37, Topic 42, gamma = 0.219630348159501"
[1] "During the past year several of our southern sister Republics celebrated the one hundredth anniversary of their independence. In honor of these events, special embassies were sent from this country to Argentina, Chile, and Mexico, where the gracious reception and splendid hospitality extended them manifested the cordial relations and friendship existing between those countries and the United States, relations which I am happy to believe have never before been upon so high a plane and so solid a basis as at present."
[1] "William_H._Taft_1911_76, Topic 42, gamma = 0.224613585234379"
[1] "During the past year the Republic of Venezuela celebrated the one hundredth anniversary of its independence. The United States sent, in honor of this event, a special embassy to Caracas, where the cordial reception and generous hospitality shown it were most gratifying as a further proof of the good relations and friendship existing between that country and the United States. MEXICO."
[1] "Woodrow_Wilson_1915_5, Topic 42, gamma = 0.218304063534951"
[1] "There was a time in the early days of our own great nation and of the republics fighting their way to independence in Central and South America when the government of the United States looked upon itself as in some sort the guardian of the republics to the South of her as against any encroachments or efforts at political control from the other side of the water; felt it its duty to play the part even without invitation from them; and I think that we can claim that the task was undertaken with a true and disinterested enthusiasm for the freedom of the Americas and the unmolested Self-government of her independent peoples. But it was always difficult to maintain such a role without offense to the pride of the peoples whose freedom of action we sought to protect, and without provoking serious misconceptions of our motives, and every thoughtful man of affairs must welcome the altered circumstances of the new day in whose light we now stand, when there is no claim of guardianship or thought of wards but, instead, a full and honorable association as of partners between ourselves and our neighbors, in the interest of all America, north and south. Our concern for the independence and prosperity of the states of Central and South America is not altered. We retain unabated the spirit that has inspired us throughout the whole life of our government and which was so frankly put into words by President Monroe. We still mean always to make a common cause of national independence and of political liberty in America. But that purpose is now better understood so far as it concerns ourselves. It is known not to be a selfish purpose. It is known to have in it no thought of taking advantage of any government in this hemisphere or playing its political fortunes for our own benefit. All the governments of America stand, so far as we are concerned, upon a footing of genuine equality and unquestioned independence."
[1] "Woodrow_Wilson_1917_38, Topic 42, gamma = 0.236866900885327"
[1] "We can do this with all the greater zeal and enthusiasm because we know that for us this is a war of high principle, debased by no selfish ambition of conquest or spoliation; because we know, and all the world knows, that we have been forced into it to save the very institutions we five under from corruption and destruction. The purpose of the Central Powers strikes straight at the very heart of everything we believe in; their methods of warfare outrage every principle of humanity and of knightly honor; their intrigue has corrupted the very thought and spirit of many of our people; their sinister and secret diplomacy has sought to take our very territory away from us and disrupt the union of the states. Our safety would be at an end, our honor forever sullied and brought into contempt, were we to permit their triumph. They are striking at the very existence of democracy and liberty."

Correlation between topics

words <- augment(sotu_lda_50a05, data = sotu_dtm)
words$year <- words$document %>% 
                str_replace("[A-z_\\.]+_", "") %>% 
                str_replace("_[0-9]+$", "")
words <- words %>% mutate(topic = ifelse(.topic < 10, str_c("t_0", .topic), str_c("t_", .topic))) %>%
                  group_by(year, topic) %>% summarize(words = sum(count)) %>%
                  spread(topic, words) %>% ungroup
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
words[is.na(words)] <- 0
correlations <- words %>% select(-year) %>% cor()
correlations[upper.tri(correlations)] <- NA
data.frame(correlations) %>% mutate(topicX = rownames(correlations)) %>%
      gather("topicY", "correlation", -topicX) %>%
      filter(!is.na(correlation))  %>%              
  ggplot(aes(x = topicX, y = topicY, fill = correlation)) + geom_tile() +
    scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1)) +
    theme_minimal() + theme(axis.text.x = element_text(angle = 90), legend.position = "bottom") +
    labs(x = "Topic", y = "Topic", fill = "Pearson Correlation")

Topic 35 appears to be negatively correlated with most of the other topics. Let’s see what topic 35 is.

explore(sotu_lda_50a05, 35)
[1] "Andrew_Johnson_1867_43, Topic 35, gamma = 0.282843216044233"
[1] "Since the commencement of the second session of the Thirty-ninth Congress 510 miles of road have been constructed on the main line and branches of the Pacific Railway. The line from Omaha is rapidly approaching the eastern base of the Rocky Mountains, while the terminus of the last section of constructed road in California, accepted by the Government on the 24th day of October last, was but 11 miles distant from the summit of the Sierra Nevada. The remarkable energy evinced by the companies offers the strongest assurance that the completion of the road from Sacramento to Omaha will not be long deferred."
[1] "Benjamin_Harrison_1889_108, Topic 35, gamma = 0.335039359497305"
[1] "The attention of the Interstate Commerce Commission has been called to the urgent need of Congressional legislation for the better protection of the lives and limbs of those engaged in operating the great interstate freight lines of the country, and especially of the yardmen and brakemen. A petition signed by nearly 10,000 railway brakemen was presented to the Commission asking that steps might be taken to bring about the use of automatic brakes and couplers on freight cars."
[1] "Benjamin_Harrison_1890_84, Topic 35, gamma = 0.321903764659905"
[1] "The present situation of our mail communication with Australia illustrates the importance of early action by Congress. The Oceanic Steamship Company maintains a line of steamers between San Francisco, Sydney, and Auckland consisting of three vessels, two of which are of United States registry and one of foreign registry. For the service done by this line in carrying the mails we pay annually the sum of $46,000, being, as estimated, the full sea and United States inland postage, which is the limit fixed by law. The colonies of New South Wales and New Zealand have been paying annually to these lines lbs. 37,000 for carrying the mails from Sydney and Auckland to San Francisco. The contract under which this payment has been made is now about to expire, and those colonies have refused to renew the contract unless the United States shall pay a more equitable proportion of the whole sum necessary to maintain the service."
[1] "Benjamin_Harrison_1890_94, Topic 35, gamma = 0.340656556943172"
[1] "The use of the telegraph by the Post-Office Department as a means for the rapid transmission of written communications is, I believe, upon proper terms, quite desirable. The Government does not own or operate the railroads, and it should not, I think, own or operate the telegraph lines. It does, however, seem to be quite practicable for the Government to contract with the telegraph companies, as it does with railroad companies, to carry at specified rates such communications as the senders may designate for this method of transmission. I recommend that such legislation be enacted as will enable the Post-Office Department fairly to test by experiment the advantages of such a use of the telegraph."
[1] "Benjamin_Harrison_1892_70, Topic 35, gamma = 0.342097564132645"
[1] "I earnestly urge the continuance of the policy inaugurated by this legislation, and that the appropriations required to meet the obligations of the Government under the contracts may be made promptly, so that the lines that have entered into these engagements may not be embarrassed. We have had, by reason of connections with the transcontinental railway lines constructed through our own territory, some advantages in the ocean trade of the Pacific that we did not possess on the Atlantic. The construction of the Canadian Pacific Railway and the establishment under large subventions from Canada and England of fast steamship service from Vancouver with Japan and China seriously threaten our shipping interests in the Pacific. This line of English steamers receives, as is stated by the Commissioner of Navigation, a direct subsidy of $400,000 annually, or $30,767 per trip for thirteen voyages, in addition to some further aid from the Admiralty in connection with contracts under which the vessels may be used for naval purposes. The competing American Pacific mail line under the act of March 3, 1891, receives only $6,389 per round trip."
[1] "Calvin_Coolidge_1924_23, Topic 35, gamma = 0.316302137701649"
[1] "The railways during the past year have made still further progress in recuperation from the war, with large rains in efficiency and ability expeditiously to handle the traffic of the country. We have now passed through several periods of peak traffic without the car shortages which so frequently in the past have brought havoc to our agriculture and industries. The condition of many of our great freight terminals is still one of difficulty and results in imposing, large costs on the public for inward-bound freight, and on the railways for outward-bound freight. Owing to the growth of our large cities and the great increase in the volume of traffic, particularly in perishables, the problem is not only difficult of solution, but in some cases not wholly solvable by railway action alone."
[1] "Calvin_Coolidge_1927_20, Topic 35, gamma = 0.333330294406473"
[1] "A rapid growth is taking place in aeronautics. The Department of Commerce has charge of the inspection and licensing system and the construction of national airways. Almost 8,000 miles are already completed and about 4,000 miles more contemplated. Nearly 4,400 miles are now equipped and over 3,000 miles more will have lighting and emergency landing fields by next July. Air mail contracts are expected to cover 24 of these lines. Daily airway flying is nearly 15,000 miles and is expected to reach 25,000 miles early next year."
[1] "Calvin_Coolidge_1928_55, Topic 35, gamma = 0.339195425621855"
[1] "Our national airway system exceeds 14,000 miles in length and has 7,500 miles lighted for night operations. Provision has been made for lighting 4,000 miles more during the current fiscal year and equipping an equal mileage with radio facilities. Three-quarters of our people are now served by these routes. With the rapid growth of air mail, express, and passenger service, this new transportation medium is daily becoming a more important factor in commerce. It is noteworthy that this development has taken place without governmental subsidies. Commercial passenger flights operating on schedule have reached 13,000 miles per day."
[1] "Grover_Cleveland_1894_89, Topic 35, gamma = 0.336617402830591"
[1] "The Postmaster-General states that this deficiency is unnecessary and might be obviated at once if the law regulating rates upon mail matter of the second class was modified. The rate received for the transmission of this second-class matter is 1 cent per pound, while the cost of such transmission to the Government is eight times that amount. In the general terms of the law this rate covers newspapers and periodicals. The extensions of the meaning of these terms from time to time have admitted to the privileges intended for legitimate newspapers and periodicals a surprising range of publications and created abuses the cost of which amounts in the aggregate to the total deficiency of the Post-Office Department. Pretended newspapers are started by business houses for the mere purpose of advertising goods, complying with the law in form only and discontinuing the publications as soon as the period of advertising is over. \"Sample copies\" of pretended newspapers are issued in great numbers for a like purpose only. The result is a great loss of revenue to the Government, besides its humiliating use as an agency to aid in carrying out the scheme of a business house to advertise its goods by means of a trick upon both its rival houses and the regular and legitimate newspapers. Paper-covered literature, consisting mainly of trashy novels, to the extent of many thousands of tons is sent through the mails at 1 cent per pound, while the publishers of standard works are required to pay eight times that amount in sending their publications. Another abuse consists in the free carriage through the mails of hundreds of tons of seed and grain uselessly distributed through the Department of Agriculture. The Postmaster-General predicts that if the law be so amended as to eradicate these abuses not only will the Post-Office Department show no deficiency, but he believes that in the near future all legitimate newspapers and periodical magazines might be properly transmitted through the mails to their subscribers free of cost. I invite your prompt consideration of this subject and fully indorse the views of the Postmaster-General."
[1] "Grover_Cleveland_1896_89, Topic 35, gamma = 0.275175394441182"
[1] "From each pound of first-class matter .........cents \t93.0 From each pound of second class ...............mills \t8.5 From each pound of third class ...................cents \t13.1 From each pound of fourth class ..................do \t15.6 Of the second class 52,348,297 was county-free matter."
[1] "Herbert_Hoover_1929_69, Topic 35, gamma = 0.379344628819673"
[1] "There are over 3,000,000 miles of legally established highways in the United States, of which about 10 per cent are included in the State highway systems, the remainder being county and other local roads. About 626,000 miles have been improved with some type of surfacing, comprising some 63 per cent of the State highway systems and 16 per cent of the local roads. Of the improved roads about 102,000 miles are hard surfaced, comprising about 22 per cent of the State highway systems and about 8 per cent of the local roads."
[1] "Herbert_Hoover_1929_80, Topic 35, gamma = 0.282765765598028"
[1] "As a whole, the railroads never were in such good physical and financial condition, and the country has never been so well served by them. The greatest volume of freight traffic ever tendered is being carried at a speed never before attained and with satisfaction to the shippers. Efficiencies and new methods have resulted in reduction in the cost of providing freight transportation, and freight rates show a continuous descending line from the level enforced by the World War."
[1] "James_Buchanan_1857_97, Topic 35, gamma = 0.361201790215777"
[1] "The length of post-roads in 1827 was 105,336 miles; in 1837,141,242 miles; in 1847, 153,818 miles, and in the year 1857 there are 242,601 miles of post-road, including 22,530 miles of railroad on which the mails are transported."
[1] "James_Polk_1848_39, Topic 35, gamma = 0.296419498069007"
[1] "The monthly line of mail steamers from Panama to Astoria has been required to \"stop and deliver and take mails at San Diego, Monterey, and San Francisco.\" These mail steamers, connected by the Isthmus of Panama with the line of mail steamers on the Atlantic between New York and Chagres, will establish a regular mail communication with California."
[1] "Martin_van_Buren_1839_38, Topic 35, gamma = 0.298872007207917"
[1] "Some difficulties have arisen in relation to contracts for the transportation of the mails by railroad and steamboat companies. It appears that the maximum of compensation provided by Congress for the transportation of the mails upon railroads is not sufficient to induce some of the companies to convey them at such hours as are required for the accommodation of the public. It is one of the most important duties of the General Government to provide and maintain for the use of the people of the States the best practicable mail establishment. To arrive at that end it is indispensable that the Post-Office Department shall be enabled to control the hours at which the mails shall be carried over railroads, as it now does over all other roads. Should serious inconveniences arise from the inadequacy of the compensation now provided by law, or from unreasonable demands by any of the railroad companies, the subject is of such general importance as to require the prompt attention of Congress."
[1] "Millard_Fillmore_1851_97, Topic 35, gamma = 0.289469584711025"
[1] "At the close of the last fiscal year the length of mail routes within the United States was 196,290 miles, the annual transportation thereon 53,272,252 miles, and the annual cost of such transportation $3,421,754."
[1] "Theodore_Roosevelt_1907_91, Topic 35, gamma = 0.309042291857488"
[1] "We now pay under the act of 1891 $4 a statute mile outward to 20-knot American mail steamships, built according to naval plans, available as cruisers, and manned by Americans. Steamships of that speed are confined exclusively to trans-Atlantic trade with New York. To steamships of 16 knots or over only $2 a mile can be paid, and it is steamships of this speed and type which are needed to meet the requirements of mail service to South America, Asia (including the Philippines), and Australia. I strongly recommend, therefore, a simple amendment to the ocean mail act of 1891 which shall authorize the Postmaster-General in his discretion to enter into contracts for the transportation of mails to the Republics of South America, to Asia, the Philippines, and Australia at a rate not to exceed $4 a mile for steamships of 16 knots speed or upwards, subject to the restrictions and obligations of the act of 1891. The profit of $3,600,000 which has been mentioned will fully cover the maximum annual expenditure involved in this recommendation, and it is believed will in time establish the lines so urgently needed. The proposition involves no new principle, but permits the efficient discharge of public functions now inadequately performed or not performed at all."
[1] "Warren_Harding_1922_15, Topic 35, gamma = 0.317198100721346"
[1] "We have built 40 per cent of the world's railroad mileage, and yet find it inadequate to our present requirements. When we contemplate the inadequacy of to-day it is easy to believe that the next few decades will witness the paralysis of our transportation-using social scheme or a complete reorganization on some new basis. Mindful of the tremendous costs of betterments, extensions, and expansions, and mindful of the staggering debts of the world to-day, the difficulty is magnified. Here is a problem demanding wide vision and the avoidance of mere makeshifts. No matter what the errors of the past, no matter how we acclaimed construction and then condemned operations in the past, we have the transportation and the honest investment in the transportation which sped us on to what we are, and we face conditions which reflect its inadequacy to-day, its greater inadequacy to-morrow, and we contemplate transportation costs which much of the traffic can not and will not continue to pay."
[1] "Warren_Harding_1922_16, Topic 35, gamma = 0.349433041510265"
[1] "Manifestly, we have need to begin on plans to coordinate all transportation facilities. We should more effectively connect up our rail lines with our carriers by sea. We ought to reap some benefit from the hundreds of millions expended on inland waterways, proving our capacity to utilize as well as expend. We ought to turn the motor truck into a railway feeder and distributor instead of a destroying competitor."
[1] "Warren_Harding_1922_17, Topic 35, gamma = 0.325182610132973"
[1] "It would be folly to ignore that we live in a motor age. The motor car reflects our standard of living and gauges the speed of our present-day life. It long ago ran down Simple Living, and never halted to inquire about the prostrate figure which fell as its victim. With full recognition of motor-car transportation we must turn it to the most practical use. It can not supersede the railway lines, no matter how generously we afford it highways out of the Public Treasury. If freight traffic by motor were charged with its proper and proportionate share of highway construction, we should find much of it wasteful and more costly than like service by rail. Yet we have paralleled the railways, a most natural line of construction, and thereby taken away from the agency of expected service much of its profitable traffic, which the taxpayers have been providing the highways, whose cost of maintenance is not yet realized."
[1] "Warren_Harding_1922_24, Topic 35, gamma = 0.287082849316694"
[1] "The merger of lines into systems, a facilitated interchange of freight cars, the economic use of terminals, and the consolidation of facilities are suggested ways of economy and efficiency."
[1] "William_H._Taft_1909_106, Topic 35, gamma = 0.327081000574859"
[1] "The deficit every year in the Post-Office Department is largely caused by the low rate of postage of 1 cent a pound charged on second-class mail matter, which includes not only newspapers, but magazines and miscellaneous periodicals. The actual loss growing out of the transmission of this second-class mail matter at 1 cent a pound amounts to about $63,000,000 a year. The average cost of the transportation of this matter is more than 9 cents a pound."
[1] "William_H._Taft_1909_107, Topic 35, gamma = 0.460938219179577"
[1] "It appears that the average distance over which newspapers are delivered to their customers is 291 miles, while the average haul of magazines is 1,049, and of miscellaneous periodicals 1,128 miles. Thus, the average haul of the magazine is three and one-half times and that of the miscellaneous periodical nearly four times the haul of the daily newspaper, yet all of them pay the same postage rate of 1 cent a pound. The statistics of 1907 show that second-class mail matter constituted 63.91 per cent. of the weight of all the mail, and yielded only 5.19 per cent. of the revenue."
[1] "William_H._Taft_1912_198, Topic 35, gamma = 0.291075271174266"
[1] "It is expected that the establishment of a parcel post on January 1st will largely increase the amount of mail matter to be transported by the railways, and Congress should be prompt to provide a way by which they may receive the additional compensation to which they will be entitled. The Postmaster General urges that the department's plan for a complete readjustment of the system of paying the railways for carrying the mails be adopted, substituting space for weight as the principal factor in fixing compensation. Under this plan it will be possible to determine without delay what additional payment should be made on account of the parcel post. The Postmaster General's recommendation is based on the results of a far-reaching investigation begun early in the administration with the object of determining what it costs the railways to carry the mails. The statistics obtained during the course of the inquiry show that while many of the railways, and particularly the large systems, were making profits from mail transportations, certain of the lines were actually carrying the mails at a loss. As a result of the investigation the department, after giving the subject careful consideration, decided to urge the abandonment of the present plan of fixing compensation on the basis of the weight of the mails carried, a plan that has proved to be exceedingly expensive and in other respects unsatisfactory. Under the method proposed the railway companies will annually submit to the department reports showing what it costs them to carry the mails, and this cost will be apportioned on the basis of the car space engaged, payment to be allowed at the rate thus determined in amounts that will cover the cost and a reasonable profit. If a railway is not satisfied with the manner in which the department apportions the cost in fixing compensation, it is to have the right, tinder the new plan, of appealing to the Interstate Commerce Commission. This feature of the proposed law would seem to insure a fair treatment of the railways. It is hoped that Congress will give the matter immediate attention and that the method of compensation recommended by the department or some other suitable plan will be promptly authorized."
[1] "Woodrow_Wilson_1914_13, Topic 35, gamma = 0.326382101325192"
[1] "The case is not unlike that which confronted us when our own continent was to be opened up to settlement and industry, and we needed long lines of railway, extended means of transportation prepared beforehand, if development was not to lag intolerably and wait interminably. We lavishly subsidized the building of transcontinental railroads. We look back upon that with regret now, because the subsidies led to many scandals of which we are ashamed; but we know that the railroads had to be built, and if we had it to do over again we should of course build them, but in another way. Therefore I propose another way of providing the means of transportation, which must precede, not tardily follow, the development of our trade with our neighbor states of America. It may seem a reversal of the natural order of things, but it is true, that the routes of trade must be actually opened-by many ships and regular sailings and moderate charges-before streams of merchandise will flow freely and profitably through them."

Clustering and classification with topics

We can also use the results of topic modeling as inputs for clustering and classification. Instead of using term frequency as our feature set, we can use topic frequency.

Here is how we would cluster documents on the basis of the 50-topic (a = 0.5) model.

words <- augment(sotu_lda_50a05, data = sotu_dtm)
words$year <- words$document %>% 
                str_replace("[A-z_\\.]+_", "") %>% 
                str_replace("_[0-9]+$", "")
lda_dtm <- words %>% group_by(year, .topic) %>% 
              summarize(words = sum(count)) %>% cast_dtm(year, .topic, words)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
plot(hclust(dist(lda_dtm)))

Just as we saw in Notebook 5, the 1981 and 1946 State of the Union addresses were very different from the others. This dendrogram suggests to me that the 1846, 1946, and 1981 addresses were unique, and that the others group into three clusters. So let’s cut the tree at six clusters and see what that looks like.

This is the plot_cluster() function from Notebook 5.

plot_cluster <- function(nclust) {
  data.frame(cmdscale(sotu_words_dist, k = 2)) %>% 
    mutate(cluster = cutree(sotu_words_cluster, nclust), year = sotu$year) %>%
  ggplot(aes(x = X1, y = X2, color = factor(cluster), label = year)) + geom_label()
}

I’m just going to make it a bit more general so I can use it here to plot my six clusters.

plot_cluster <- function(dtm, nclust) {
  data.frame(cmdscale(dist(dtm), k = 2)) %>%
    mutate(cluster = cutree(hclust(dist(dtm)), nclust), year = sotu$year) %>%
  ggplot(aes(x = X1, y = X2, color = factor(cluster), label = year)) + geom_label()
}
plot_cluster(lda_dtm, 6)

This is our cluster_words() function from Notebook 5.

I’m also going to make it more general so I can use it for this cluster.

cluster_words <- function(dtm, nclust) {
  sotu %>% mutate(cluster = cutree(hclust(dist(dtm)), nclust)) %>%
           unnest_tokens(gram, text) %>% 
           filter(!gram %in% stop_words$word & !str_detect(gram, "[:digit:]")) %>%
           group_by(cluster) %>% count(gram) %>% bind_tf_idf(gram, cluster, n) %>%
           top_n(10, tf_idf) %>% summarize(words = str_c(gram, collapse = ", "))
}
cluster_words(lda_dtm, 6)

There are many questions we can ask about these clusters. We can ask what makes them different from one another, or we can ask what makes the addresses within any cluster similar to one another.

We can also use topics as the feature set for a classification model. Here we will do just SVM to classify the “Other” addresses as either “Democratic” or “Republican,” just as we did in Notebook 6.

library(e1071)
words <- augment(sotu_lda_50a05, data = sotu_dtm) 
  words$year <- words$document %>% str_replace("[A-z_\\.]+_", "") %>% 
                                   str_replace("_[0-9]+$", "") %>% as.numeric
  words$pres <- words$document %>% str_replace("_[0-9\\._]+$", "") %>%
                                   str_replace_all("_", " ")
  words$party <- ifelse(words$pres %in% democrats, "Democratic",
                 ifelse(words$pres %in% republicans, "Republican", "Other"))
topics <- words %>% mutate(topic = str_c("t_", .topic)) %>%
            group_by(year, topic, party) %>% summarize(words = sum(count)) %>%
            spread(topic, words)
`summarise()` has grouped output by 'year', 'topic'. You can override using the `.groups` argument.
topics[is.na(topics)] <- 0
train <- topics[topics$party != "Other", ]
class <- topics[topics$party == "Other", ]
model <- svm(train[ , 3:ncol(train)], factor(train$party))
predictions <- sotu %>% filter(party == "Other") %>%
                mutate(prediction = predict(model, class[ , 3:ncol(class)]))
table(predictions$pres, predictions$prediction)
                   
                    Democratic Republican
  Andrew Johnson             3          1
  George Washington          8          0
  James Madison              8          0
  James Monroe               3          5
  John Adams                 4          0
  John Quincy Adams          2          2
  John Tyler                 4          0
  Millard Fillmore           2          1
  Thomas Jefferson           8          0
  Zachary Taylor             0          1

Let’s test the model using leave-one-out cross-validation.

predictions <- data.frame()
for(i in 1:nrow(train)) {
  model <- svm(train[-i, 3:ncol(train)], factor(train[-i, ]$party))
  pred <- predict(model, train[i, 3:ncol(train)])
  predict <- train[i, 1:2] %>% ungroup %>% mutate(prediction = pred)
  predictions <- rbind(predictions, predict)
}
table(predictions$party, predictions$prediction)
            
             Democratic Republican
  Democratic         62         27
  Republican          8         84
LS0tCnRpdGxlOiAiTm90ZWJvb2sgOCAtIFRvcGljIE1vZGVsaW5nIFBhcnQgMiIKYXV0aG9yOiAiRW1pbHkgS2xhbmNoZXIgTWVyY2hhbnQiCmRhdGU6ICJTVFMgMjA1IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIFdvcmtpbmcgd2l0aCB0aGUgNTAtdG9waWMgbW9kZWwKVGhlIHZpZGVvIHRoYXQgYWNjb21wYW5pZXMgdGhpcyBub3RlYm9vayBpcyBhdmFpbGFibGUgYXQgaHR0cHM6Ly91Y2RhdmlzLmJveC5jb20vdi9zdHMtMjA1LW5vdGVib29rLTguCgpJbiB0aGlzIG5vdGVib29rLCB3ZSB3aWxsIGJlIGRvaW5nIG1vcmUgd29yayB3aXRoIHRoZSA1MC10b3BpYyBtb2RlbCB3ZSBtYWRlIGZvciB0aGUgU3RhdGUgb2YgdGhlIFVuaW9uIEFkZHJlc3NlcyBhdCB0aGUgZW5kIG9mIE5vdGVib29rIDcgYW5kIGxvb2tpbmcgYXQgc29tZSBtb3JlIHdheXMgdG8gZXhwbG9yZSB0aGUgcmVzdWx0cy4gTWFrZSBzdXJlIHRoYXQgeW91IGhhdmUgY29waWVkIGFsbCBmdW5jdGlvbnMgZnJvbSBOb3RlYm9vayA3IGludG8geW91ciBgZnVuY3Rpb25zLlJgIGZpbGUuCgpTdGFydCBieSBsb2FkaW5nIHBhY2thZ2VzIGFuZCBzb3VyY2luZyBmdW5jdGlvbnMuCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeSh0aWR5dGV4dCkKbGlicmFyeSh0ZXh0c3RlbSkKbGlicmFyeSh0b3BpY21vZGVscykKI2luc3RhbGwucGFja2FnZXMoIndvcmRjbG91ZCIpCmxpYnJhcnkod29yZGNsb3VkKQpzb3VyY2UoImZ1bmN0aW9ucy5SIikKYGBgCk5vdyBidWlsZCB0aGUgYHNvdHVgIGRhdGEgZnJhbWUgYW5kIHRoZSBgc290dV9kdG1gIGRvY3VtZW50LXRlcm0gbWF0cml4IGp1c3QgbGlrZSB3ZSBkaWQgaW4gdGhlIGxhc3Qgbm90ZWJvb2ssIGJ1dCBhbHNvIGFkZCBwYXJ0eSB0byBgc290dWAgKGRlbW9jcmF0aWMsIHJlcHVibGljLCBvdGhlcikuCmBgYHtyfQpzb3R1IDwtIG1ha2Vfc290dSgpCiNBZGQgcGFydHkKcmVwdWJsaWNhbnMgPC0gYygiQWJyYWhhbSBMaW5jb2xuIiwgIlVseXNzZXMgUy4gR3JhbnQiLCAiUnV0aGVyZm9yZCBCLiBIYXllcyIsIAogICAgICAgICAgICAgICAgICJKYW1lcyBHYXJmaWVsZCIsICJDaGVzdGVyIEEuIEFydGh1ciIsICJCZW5qYW1pbiBIYXJyaXNvbiIsIAogICAgICAgICAgICAgICAgICJXaWxsaWFtIE1jS2lubGV5IiwgIlRoZW9kb3JlIFJvb3NldmVsdCIsICJXaWxsaWFtIEguIFRhZnQiLCAKICAgICAgICAgICAgICAgICAiV2FycmVuIEhhcmRpbmciLCAiQ2FsdmluIENvb2xpZGdlIiwgIkhlcmJlcnQgSG9vdmVyIiwgCiAgICAgICAgICAgICAgICAgIkR3aWdodCBELiBFaXNlbmhvd2VyIiwgIlJpY2hhcmQgTml4b24iLCAiR2VyYWxkIFIuIEZvcmQiLCAKICAgICAgICAgICAgICAgICAiUm9uYWxkIFJlYWdhbiIsICJHZW9yZ2UgSC5XLiBCdXNoIiwgIkdlb3JnZSBXLiBCdXNoIiwgIkRvbmFsZCBKLiBUcnVtcCIpCmRlbW9jcmF0cyA8LSBjKCJBbmRyZXcgSmFja3NvbiIsICJNYXJ0aW4gdmFuIEJ1cmVuIiwgIkphbWVzIFBvbGsiLCAKICAgICAgICAgICAgICAgIkZyYW5rbGluIFBpZXJjZSIsICJKYW1lcyBCdWNoYW5hbiIsICJHcm92ZXIgQ2xldmVsYW5kIiwgCiAgICAgICAgICAgICAgICJXb29kcm93IFdpbHNvbiIsICJGcmFua2xpbiBELiBSb29zZXZlbHQiLCAiSGFycnkgUy4gVHJ1bWFuIiwgCiAgICAgICAgICAgICAgICJKb2huIEYuIEtlbm5lZHkiLCAiTHluZG9uIEIuIEpvaG5zb24iLCAiSmltbXkgQ2FydGVyIiwKICAgICAgICAgICAgICAgIldpbGxpYW0gSi4gQ2xpbnRvbiIsICJCYXJhY2sgT2JhbWEiKQpzb3R1JHBhcnR5IDwtIGlmZWxzZShzb3R1JHByZXMgJWluJSByZXB1YmxpY2FucywgIlJlcHVibGljYW4iLAogICAgICAgICAgICAgIGlmZWxzZShzb3R1JHByZXMgJWluJSBkZW1vY3JhdHMsICJEZW1vY3JhdGljIiwgIk90aGVyIikpCgojQ2h1bmsgaW50byBwYXJhZ3JhcGhzCnNvdHVfcGFyYWdyYXBocyA8LSBkYXRhLmZyYW1lKCkKZm9yKGkgaW4gMTpucm93KHNvdHUpKSB7CiAgdGV4dCA8LSBzdHJfc3BsaXQoc290dSR0ZXh0W2ldLCAiIDxwPiAiKSAlPiUgdW5saXN0CiAgcGFyYWdyYXBocyA8LSB0aWJibGUodGV4dCkgJT4lIAogICAgICAgICAgICAgICAgbXV0YXRlKGlkID0gc3RyX2Moc3RyX3JlcGxhY2VfYWxsKHNvdHUkcHJlc1tpXSwgIiAiLCAiXyIpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICJfIiwgc290dSR5ZWFyW2ldLCAiXyIsIDE6bGVuZ3RoKHRleHQpKSkKICBzb3R1X3BhcmFncmFwaHMgPC0gcmJpbmQoc290dV9wYXJhZ3JhcGhzLCBwYXJhZ3JhcGhzKQp9CgojVG9rZW5pemUgYnkgd29yZHMgYW5kIHJlbW92ZSBzdG9wd29yZHMgYW5kIHdvcmRzIHdpdGggZGlnaXRzCnNvdHVfd29yZHMgPC0gc290dV9wYXJhZ3JhcGhzICU+JSB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpICU+JQogICAgICAgICAgICAgICAgYW50aV9qb2luKHN0b3Bfd29yZHMpICU+JSBmaWx0ZXIoIXN0cl9kZXRlY3Qod29yZCwgIls6ZGlnaXQ6XSIpKQoKI0xlbW1hdGl6ZQpzb3R1X2xlbW1hcyA8LSB0aWJibGUod29yZCA9IHVuaXF1ZShzb3R1X3dvcmRzJHdvcmQpKSAlPiUgCiAgICAgICAgICAgICAgICBtdXRhdGUobGVtbWEgPSBsZW1tYXRpemVfd29yZHMod29yZCkpCgojQ2FzdCBkb2N1bWVudC10ZXJtIG1hdHJpeApzb3R1X2R0bSA8LSBsZWZ0X2pvaW4oc290dV93b3Jkcywgc290dV9sZW1tYXMpICU+JSAKICAgICAgICAgICAgICAgIGdyb3VwX2J5KGlkKSAlPiUgY291bnQobGVtbWEpICU+JSBjYXN0X2R0bShpZCwgbGVtbWEsIG4pCmBgYApSZWFkIGluIHRoZSA1MC10b3BpYyBMREEgbW9kZWwgd2UgbWFkZSBhdCB0aGUgZW5kIG9mIE5vdGVib29rIDcuCmBgYHtyfQpzb3R1X2xkYV81MGExIDwtIHJlYWRSRFMoInNvdHVfbGRhXzUwX2ExLlJEUyIpCmBgYApXZSB3aWxsIHN0YXJ0IGJ5IHVzaW5nIHRoZSBmdW5jdGlvbnMgd2UgbWFkZSBsYXN0IHdlZWsgdG8gZXhwbG9yZSB0aGUgdG9waWMgbW9kZWwuIFNpbmNlIHdlIGFyZSB3b3JraW5nIHdpdGggbWFueSB0b3BpY3Mgbm93LCB3ZSB3aWxsIG1ha2UgcG5nIGZpbGVzIGZvciBlYWNoIG9mIHRoZSBncmFwaHMuCmBgYHtyfQpwbmcoInRvcGljcy5wbmciLCBoZWlnaHQgPSAxNSwgd2lkdGggPSAxNSwgdW5pdHMgPSAiaW4iLCByZXMgPSAxMDApCiAgdG9waWNzXzUoc290dV9sZGFfNTBhMSkKZGV2Lm9mZigpCnBuZygidG9waWNzX2J5X3llYXIucG5nIiwgaGVpZ2h0ID0gMTUsIHdpZHRoID0gMTUsIHVuaXRzID0gImluIiwgcmVzID0gMTAwKQogIHRvcGljc195ZWFycyhzb3R1X2xkYV81MGExKQpkZXYub2ZmKCkKcG5nKCJobWFwXzUwX3RvcGljcy5wbmciLCBoZWlnaHQgPSAxNSwgd2lkdGggPSAxNSwgdW5pdHMgPSAiaW4iLCByZXMgPSAxMDApCiAgaG1hcChzb3R1X2xkYV81MGExLCBzb3R1X2R0bSwgNTApCmRldi5vZmYoKQpgYGAKIVtdKHRvcGljcy5wbmcpCiFbXSh0b3BpY3NfYnlfeWVhci5wbmcpCiFbXShobWFwXzUwX3RvcGljcy5wbmcpCgpJJ20gZ29pbmcgdG8gdHJ5IHR1cm5pbmcgYWxwaGEgZG93biBhIGJpdCBtb3JlLCB0aGlzIHRpbWUgdG8gMC41LgpgYGB7cn0KIyBzb3R1X2xkYV81MGEwNSA8LSBMREEoc290dV9kdG0sIGsgPSA1MCwgY29udHJvbCA9IGxpc3QoYWxwaGEgPSAwLjUpKQojIHNhdmVSRFMoc290dV9sZGFfNTBhMDUsICJzb3R1X2xkYV81MF9hMDUuUkRTIikKCnNvdHVfbGRhXzUwYTA1IDwtIHJlYWRSRFMoInNvdHVfbGRhXzUwX2EwNS5SRFMiKQpwbmcoInRvcGljc19hMDUucG5nIiwgaGVpZ2h0ID0gMTUsIHdpZHRoID0gMTUsIHVuaXRzID0gImluIiwgcmVzID0gMTAwKQogIHRvcGljc181KHNvdHVfbGRhXzUwYTA1KQpkZXYub2ZmKCkKcG5nKCJ0b3BpY3NfYnlfeWVhcl9hMDUucG5nIiwgaGVpZ2h0ID0gMTUsIHdpZHRoID0gMTUsIHVuaXRzID0gImluIiwgcmVzID0gMTAwKQogIHRvcGljc195ZWFycyhzb3R1X2xkYV81MGEwNSkKZGV2Lm9mZigpCnBuZygiaG1hcF81MF90b3BpY3NfYTA1LnBuZyIsIGhlaWdodCA9IDE1LCB3aWR0aCA9IDE1LCB1bml0cyA9ICJpbiIsIHJlcyA9IDEwMCkKICBobWFwKHNvdHVfbGRhXzUwYTA1LCBzb3R1X2R0bSwgNTApCmRldi5vZmYoKQpgYGAKIVtdKHRvcGljc19hMDUucG5nKQohW10odG9waWNzX2J5X3llYXJfYTA1LnBuZykKIVtdKGhtYXBfNTBfdG9waWNzX2EwNS5wbmcpCgojIyBFeHBsb3JlIHRvcGljcyBpbiBkZXRhaWwKClRvIGV4cGxvcmUgdGhlIHRvcGljcyBpbiBtb3JlIGRldGFpbCwgSSdtIGdvaW5nIHRvIG1ha2UgYSB3b3JkIGNsb3VkIGZvciBlYWNoIG9uZSAoYmUgc3VyZSB0byBjcmVhdGUgdGhlIGZvbGRlciAid29yZGNsb3VkcyIgaW4geW91ciB3b3JraW5nIGRpcmVjdG9yeSBiZWZvcmUgcnVubmluZyB0aGlzLgpgYGB7cn0Kd29yZF9jbG91ZHMgPC0gZnVuY3Rpb24obGRhKSB7CiAgYmV0YSA8LSB0aWR5KGxkYSwgbWF0cml4ID0gImJldGEiKQogIGZvciAoaSBpbiB1bmlxdWUoYmV0YSR0b3BpYykpIHsKICAgIHBuZyhzdHJfYygid29yZGNsb3Vkcy90IiwgaSwgIi5wbmciKSwgaGVpZ2h0ID0gMywgd2lkdGggPSAzLCB1bml0cyA9ICJpbiIsIHJlcyA9IDEwMCkKICAgICAgd29yZGNsb3VkKHdvcmRzID0gYmV0YVtiZXRhJHRvcGljID09IGksIF0kdGVybSwgCiAgICAgICAgICAgICAgICBmcmVxID0gYmV0YVtiZXRhJHRvcGljID09IGksIF0kYmV0YSwgCiAgICAgICAgICAgICAgICBtYXgud29yZHMgPSAyMDApCiAgICBkZXYub2ZmKCkKICB9Cn0Kd29yZF9jbG91ZHMoc290dV9sZGFfNTBhMDUpCmBgYAoKIVtdKHdvcmRjbG91ZHMvdDEucG5nKQpJJ20gYWxzbyBnb2luZyB0byBncmFwaCB0aGUgb2NjdXJyZW5jZSBvZiBlYWNoIHRvcGljIG92ZXIgdGltZSBhbmQgYnkgdGhlIHBhcnR5IG9mIHRoZSBwcmVzaWRlbnQgd2hvIGdhdmUgdGhlIGFkZHJlc3MgKGNyZWF0ZSBhIGZvbGRlciBjYWxsZWQgImNocm9ub2xvZ3kiIGJlZm9yZSBydW5uaW5nIHRoaXMgY2h1bmspLgpgYGB7cn0KY2hyb25vbG9neSA8LSBmdW5jdGlvbihsZGEpIHsKICB3b3JkcyA8LSBhdWdtZW50KGxkYSwgZGF0YSA9IHNvdHVfZHRtKSAKICB3b3JkcyR5ZWFyIDwtIHdvcmRzJGRvY3VtZW50ICU+JSBzdHJfcmVwbGFjZSgiW0Etel9cXC5dK18iLCAiIikgJT4lIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cl9yZXBsYWNlKCJfWzAtOV0rJCIsICIiKSAlPiUgYXMubnVtZXJpYwogIHdvcmRzJHByZXMgPC0gd29yZHMkZG9jdW1lbnQgJT4lIHN0cl9yZXBsYWNlKCJfWzAtOVxcLl9dKyQiLCAiIikgJT4lCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc3RyX3JlcGxhY2VfYWxsKCJfIiwgIiAiKQogIHdvcmRzJHBhcnR5IDwtIGlmZWxzZSh3b3JkcyRwcmVzICVpbiUgZGVtb2NyYXRzLCAiRGVtb2NyYXRpYyIsCiAgICAgICAgICAgICAgICAgaWZlbHNlKHdvcmRzJHByZXMgJWluJSByZXB1YmxpY2FucywgIlJlcHVibGljYW4iLCAiT3RoZXIiKSkKICB3b3JkcyA8LSB3b3JkcyAlPiUKICAgICAgICAgICAgZ3JvdXBfYnkoeWVhciwgcGFydHksIC50b3BpYykgJT4lIHN1bW1hcml6ZShuID0gc3VtKGNvdW50KSkgJT4lCiAgICAgICAgICAgIHVuZ3JvdXAgJT4lIGdyb3VwX2J5KHllYXIpICU+JSBtdXRhdGUocCA9IG4vc3VtKG4pKQogIGZvciAoaSBpbiB1bmlxdWUod29yZHMkLnRvcGljKSkgewogICAgZ3JhcGggPC0gZ2dwbG90KHdvcmRzW3dvcmRzJC50b3BpYyA9PSBpLCBdLCBhZXMoeCA9IHllYXIsIHkgPSBwLCBjb2xvciA9IHBhcnR5KSkgKyAKICAgICAgZ2VvbV9wb2ludCgpICsKICAgICAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoImJsdWUiLCAiYmxhY2siLCAicmVkIikpICsgZ3VpZGVzKGNvbG9yID0gIm5vbmUiKSArCiAgICAgIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQpICsKICAgICAgdGhlbWVfbWluaW1hbCgpICsKICAgICAgbGFicyh4ID0gIlllYXIiLCB5ID0gIlBlcmNlbnQgb2YgV29yZHMiLCB0aXRsZSA9IHN0cl9jKCJUb3BpYyAiLCBpKSkKICAgIHBuZyhzdHJfYygiY2hyb25vbG9neS90IiwgaSwgIi5wbmciKSwgaGVpZ2h0ID0gMywgd2lkdGggPSAzLCB1bml0cyA9ICJpbiIsIHJlcyA9IDEwMCkKICAgICAgICBwcmludChncmFwaCkKICAgIGRldi5vZmYoKQogIH0KfQpjaHJvbm9sb2d5KHNvdHVfbGRhXzUwYTA1KQpgYGAKIVtdKGNocm9ub2xvZ3kvdDEucG5nKQoKSSdtIGN1cmlvdXMgYWJvdXQgdG9waWMgMjUgZnJvbSBzb3R1X2xkYV81MF9hMDUuIExldCdzIGxvb2sgYXQgdGhlIHBhcmFncmFwaHMgdGhhdCBhcmUgbW9zdCByZXByZXNlbnRhdGl2ZSBvZiB0aGF0IHRvcGljLgpgYGB7cn0KZ2FtbWEgPC0gdGlkeShzb3R1X2xkYV81MGEwNSwgbWF0cml4ID0gImdhbW1hIikgJT4lIAogICAgICAgICAgICBmaWx0ZXIodG9waWMgPT0gMjUpICU+JSBhcnJhbmdlKC1nYW1tYSkgJT4lCiAgICAgICAgICAgIHRvcF9uKDI1LCBnYW1tYSkgJT4lIAogICAgICAgICAgICBsZWZ0X2pvaW4oc290dV9wYXJhZ3JhcGhzLCBieSA9IGMoImRvY3VtZW50IiA9ICJpZCIpKQpmb3IoaSBpbiAxOjI1KSB7CiAgcHJpbnQoc3RyX2MoZ2FtbWEkZG9jdW1lbnRbaV0sICIsIFRvcGljICIsIGdhbW1hJHRvcGljW2ldLCAiLCBnYW1tYSA9ICIsIGdhbW1hJGdhbW1hW2ldKSkKICBwcmludChnYW1tYSR0ZXh0W2ldKQp9CmBgYApMb29raW5nIGF0IHRoZXNlIHBhcmFncmFwaHMgbWlnaHQgZ2l2ZSBtZSBhIHNlbnNlIG9mIGhvdyB0aGUgImZyZWUgd29ybGQiIHJoZXRvcmljIG9mIHRoZSBDb2xkIFdhciBvcmlnaW5hdGVkIGFuZCB3aGF0IGl0cyBhbnRlY2VkZW50cyB3ZXJlLgoKV2UgY2FuIHdyaXRlIGEgZ2VuZXJhbCBmdW5jdGlvbiB0byBleHBsb3JlIGFueSB0b3BpYyBpbiB0aGUgc2FtZSB3YXkuCmBgYHtyfQpleHBsb3JlIDwtIGZ1bmN0aW9uKGxkYSwgdCkgewogIGdhbW1hIDwtIHRpZHkobGRhLCBtYXRyaXggPSAiZ2FtbWEiKSAlPiUgZmlsdGVyKHRvcGljID09IHQpICU+JSAKICAgICAgICAgICAgdG9wX24oMjUsIGdhbW1hKSAlPiUgIGxlZnRfam9pbihzb3R1X3BhcmFncmFwaHMsIGJ5ID0gYygiZG9jdW1lbnQiID0gImlkIikpCiAgZm9yKGkgaW4gMToyNSkgewogICAgcHJpbnQoc3RyX2MoZ2FtbWEkZG9jdW1lbnRbaV0sICIsIFRvcGljICIsIGdhbW1hJHRvcGljW2ldLCAiLCBnYW1tYSA9ICIsIGdhbW1hJGdhbW1hW2ldKSkKICAgIHByaW50KGdhbW1hJHRleHRbaV0pCiAgfQp9CiNDYWxsIHRoZSBmdW5jdGlvbiBmb3IgdG9waWMgNDIgdG8gc2VlIHBhcmFncmFwaHMgYWJvdXQgaGVhbHRoIGNhcmUKZXhwbG9yZShzb3R1X2xkYV81MGEwNSwgNDIpCmBgYAoKIyMgQ29ycmVsYXRpb24gYmV0d2VlbiB0b3BpY3MKCmBgYHtyfQp3b3JkcyA8LSBhdWdtZW50KHNvdHVfbGRhXzUwYTA1LCBkYXRhID0gc290dV9kdG0pCndvcmRzJHllYXIgPC0gd29yZHMkZG9jdW1lbnQgJT4lIAogICAgICAgICAgICAgICAgc3RyX3JlcGxhY2UoIltBLXpfXFwuXStfIiwgIiIpICU+JSAKICAgICAgICAgICAgICAgIHN0cl9yZXBsYWNlKCJfWzAtOV0rJCIsICIiKQp3b3JkcyA8LSB3b3JkcyAlPiUgbXV0YXRlKHRvcGljID0gaWZlbHNlKC50b3BpYyA8IDEwLCBzdHJfYygidF8wIiwgLnRvcGljKSwgc3RyX2MoInRfIiwgLnRvcGljKSkpICU+JQogICAgICAgICAgICAgICAgICBncm91cF9ieSh5ZWFyLCB0b3BpYykgJT4lIHN1bW1hcml6ZSh3b3JkcyA9IHN1bShjb3VudCkpICU+JQogICAgICAgICAgICAgICAgICBzcHJlYWQodG9waWMsIHdvcmRzKSAlPiUgdW5ncm91cAp3b3Jkc1tpcy5uYSh3b3JkcyldIDwtIDAKY29ycmVsYXRpb25zIDwtIHdvcmRzICU+JSBzZWxlY3QoLXllYXIpICU+JSBjb3IoKQpjb3JyZWxhdGlvbnNbdXBwZXIudHJpKGNvcnJlbGF0aW9ucyldIDwtIE5BCmRhdGEuZnJhbWUoY29ycmVsYXRpb25zKSAlPiUgbXV0YXRlKHRvcGljWCA9IHJvd25hbWVzKGNvcnJlbGF0aW9ucykpICU+JQogICAgICBnYXRoZXIoInRvcGljWSIsICJjb3JyZWxhdGlvbiIsIC10b3BpY1gpICU+JQogICAgICBmaWx0ZXIoIWlzLm5hKGNvcnJlbGF0aW9uKSkgICU+JSAgICAgICAgICAgICAgCiAgZ2dwbG90KGFlcyh4ID0gdG9waWNYLCB5ID0gdG9waWNZLCBmaWxsID0gY29ycmVsYXRpb24pKSArIGdlb21fdGlsZSgpICsKICAgIHNjYWxlX2ZpbGxfZ3JhZGllbnQyKGxvdyA9ICJibHVlIiwgaGlnaCA9ICJyZWQiLCBtaWQgPSAid2hpdGUiLCBtaWRwb2ludCA9IDAsIGxpbWl0ID0gYygtMSwgMSkpICsKICAgIHRoZW1lX21pbmltYWwoKSArIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gOTApLCBsZWdlbmQucG9zaXRpb24gPSAiYm90dG9tIikgKwogICAgbGFicyh4ID0gIlRvcGljIiwgeSA9ICJUb3BpYyIsIGZpbGwgPSAiUGVhcnNvbiBDb3JyZWxhdGlvbiIpCmBgYApUb3BpYyAzNSBhcHBlYXJzIHRvIGJlIG5lZ2F0aXZlbHkgY29ycmVsYXRlZCB3aXRoIG1vc3Qgb2YgdGhlIG90aGVyIHRvcGljcy4gTGV0J3Mgc2VlIHdoYXQgdG9waWMgMzUgaXMuCmBgYHtyfQpleHBsb3JlKHNvdHVfbGRhXzUwYTA1LCAzNSkKYGBgCgoKIyMgQ2x1c3RlcmluZyBhbmQgY2xhc3NpZmljYXRpb24gd2l0aCB0b3BpY3MKCldlIGNhbiBhbHNvIHVzZSB0aGUgcmVzdWx0cyBvZiB0b3BpYyBtb2RlbGluZyBhcyBpbnB1dHMgZm9yIGNsdXN0ZXJpbmcgYW5kIGNsYXNzaWZpY2F0aW9uLiBJbnN0ZWFkIG9mIHVzaW5nICoqdGVybSBmcmVxdWVuY3kqKiBhcyBvdXIgZmVhdHVyZSBzZXQsIHdlIGNhbiB1c2UgKip0b3BpYyBmcmVxdWVuY3kqKi4KCkhlcmUgaXMgaG93IHdlIHdvdWxkIGNsdXN0ZXIgZG9jdW1lbnRzIG9uIHRoZSBiYXNpcyBvZiB0aGUgNTAtdG9waWMgKGEgPSAwLjUpIG1vZGVsLgpgYGB7cn0Kd29yZHMgPC0gYXVnbWVudChzb3R1X2xkYV81MGEwNSwgZGF0YSA9IHNvdHVfZHRtKQp3b3JkcyR5ZWFyIDwtIHdvcmRzJGRvY3VtZW50ICU+JSAKICAgICAgICAgICAgICAgIHN0cl9yZXBsYWNlKCJbQS16X1xcLl0rXyIsICIiKSAlPiUgCiAgICAgICAgICAgICAgICBzdHJfcmVwbGFjZSgiX1swLTldKyQiLCAiIikKbGRhX2R0bSA8LSB3b3JkcyAlPiUgZ3JvdXBfYnkoeWVhciwgLnRvcGljKSAlPiUgCiAgICAgICAgICAgICAgc3VtbWFyaXplKHdvcmRzID0gc3VtKGNvdW50KSkgJT4lIGNhc3RfZHRtKHllYXIsIC50b3BpYywgd29yZHMpCnBsb3QoaGNsdXN0KGRpc3QobGRhX2R0bSkpKQpgYGAKSnVzdCBhcyB3ZSBzYXcgaW4gTm90ZWJvb2sgNSwgdGhlIDE5ODEgYW5kIDE5NDYgU3RhdGUgb2YgdGhlIFVuaW9uIGFkZHJlc3NlcyB3ZXJlIHZlcnkgZGlmZmVyZW50IGZyb20gdGhlIG90aGVycy4gVGhpcyBkZW5kcm9ncmFtIHN1Z2dlc3RzIHRvIG1lIHRoYXQgdGhlIDE4NDYsIDE5NDYsIGFuZCAxOTgxIGFkZHJlc3NlcyB3ZXJlIHVuaXF1ZSwgYW5kIHRoYXQgdGhlIG90aGVycyBncm91cCBpbnRvIHRocmVlIGNsdXN0ZXJzLiBTbyBsZXQncyBjdXQgdGhlIHRyZWUgYXQgc2l4IGNsdXN0ZXJzIGFuZCBzZWUgd2hhdCB0aGF0IGxvb2tzIGxpa2UuCgpUaGlzIGlzIHRoZSBgcGxvdF9jbHVzdGVyKClgIGZ1bmN0aW9uIGZyb20gTm90ZWJvb2sgNS4KYGBge3J9CnBsb3RfY2x1c3RlciA8LSBmdW5jdGlvbihuY2x1c3QpIHsKICBkYXRhLmZyYW1lKGNtZHNjYWxlKHNvdHVfd29yZHNfZGlzdCwgayA9IDIpKSAlPiUgCiAgICBtdXRhdGUoY2x1c3RlciA9IGN1dHJlZShzb3R1X3dvcmRzX2NsdXN0ZXIsIG5jbHVzdCksIHllYXIgPSBzb3R1JHllYXIpICU+JQogIGdncGxvdChhZXMoeCA9IFgxLCB5ID0gWDIsIGNvbG9yID0gZmFjdG9yKGNsdXN0ZXIpLCBsYWJlbCA9IHllYXIpKSArIGdlb21fbGFiZWwoKQp9CmBgYApJJ20ganVzdCBnb2luZyB0byBtYWtlIGl0IGEgYml0IG1vcmUgZ2VuZXJhbCBzbyBJIGNhbiB1c2UgaXQgaGVyZSB0byBwbG90IG15IHNpeCBjbHVzdGVycy4KYGBge3J9CnBsb3RfY2x1c3RlciA8LSBmdW5jdGlvbihkdG0sIG5jbHVzdCkgewogIGRhdGEuZnJhbWUoY21kc2NhbGUoZGlzdChkdG0pLCBrID0gMikpICU+JQogICAgbXV0YXRlKGNsdXN0ZXIgPSBjdXRyZWUoaGNsdXN0KGRpc3QoZHRtKSksIG5jbHVzdCksIHllYXIgPSBzb3R1JHllYXIpICU+JQogIGdncGxvdChhZXMoeCA9IFgxLCB5ID0gWDIsIGNvbG9yID0gZmFjdG9yKGNsdXN0ZXIpLCBsYWJlbCA9IHllYXIpKSArIGdlb21fbGFiZWwoKQp9CnBsb3RfY2x1c3RlcihsZGFfZHRtLCA2KQpgYGAKVGhpcyBpcyBvdXIgYGNsdXN0ZXJfd29yZHMoKWAgZnVuY3Rpb24gZnJvbSBOb3RlYm9vayA1LgpgYGB7cn0KY2x1c3Rlcl93b3JkcyA8LSBmdW5jdGlvbihuY2x1c3QpIHsKICBzb3R1ICU+JSBtdXRhdGUoY2x1c3RlciA9IGN1dHJlZShzb3R1X3dvcmRzX2NsdXN0ZXIsIG5jbHVzdCkpICU+JQogICAgdW5uZXN0X3Rva2VucyhncmFtLCB0ZXh0KSAlPiUgZmlsdGVyKGdyYW0gJWluJSB0b3BfdGhvdXNhbmQkZ3JhbSkgJT4lCiAgICBncm91cF9ieShjbHVzdGVyKSAlPiUgY291bnQoZ3JhbSkgJT4lIGJpbmRfdGZfaWRmKGdyYW0sIGNsdXN0ZXIsIG4pICU+JQogICAgZ3JvdXBfYnkoY2x1c3RlcikgJT4lIHRvcF9uKDEwLCB0Zl9pZGYpICU+JSBzdW1tYXJpemUod29yZHMgPSBzdHJfYyhncmFtLCBjb2xsYXBzZSA9ICIsICIpKQp9CmBgYApJJ20gYWxzbyBnb2luZyB0byBtYWtlIGl0IG1vcmUgZ2VuZXJhbCBzbyBJIGNhbiB1c2UgaXQgZm9yIHRoaXMgY2x1c3Rlci4KYGBge3J9CmNsdXN0ZXJfd29yZHMgPC0gZnVuY3Rpb24oZHRtLCBuY2x1c3QpIHsKICBzb3R1ICU+JSBtdXRhdGUoY2x1c3RlciA9IGN1dHJlZShoY2x1c3QoZGlzdChkdG0pKSwgbmNsdXN0KSkgJT4lCiAgICAgICAgICAgdW5uZXN0X3Rva2VucyhncmFtLCB0ZXh0KSAlPiUgCiAgICAgICAgICAgZmlsdGVyKCFncmFtICVpbiUgc3RvcF93b3JkcyR3b3JkICYgIXN0cl9kZXRlY3QoZ3JhbSwgIls6ZGlnaXQ6XSIpKSAlPiUKICAgICAgICAgICBncm91cF9ieShjbHVzdGVyKSAlPiUgY291bnQoZ3JhbSkgJT4lIGJpbmRfdGZfaWRmKGdyYW0sIGNsdXN0ZXIsIG4pICU+JQogICAgICAgICAgIHRvcF9uKDEwLCB0Zl9pZGYpICU+JSBzdW1tYXJpemUod29yZHMgPSBzdHJfYyhncmFtLCBjb2xsYXBzZSA9ICIsICIpKQp9CmNsdXN0ZXJfd29yZHMobGRhX2R0bSwgNikKYGBgClRoZXJlIGFyZSBtYW55IHF1ZXN0aW9ucyB3ZSBjYW4gYXNrIGFib3V0IHRoZXNlIGNsdXN0ZXJzLiBXZSBjYW4gYXNrIHdoYXQgbWFrZXMgdGhlbSBkaWZmZXJlbnQgZnJvbSBvbmUgYW5vdGhlciwgb3Igd2UgY2FuIGFzayB3aGF0IG1ha2VzIHRoZSBhZGRyZXNzZXMgd2l0aGluIGFueSBjbHVzdGVyIHNpbWlsYXIgdG8gb25lIGFub3RoZXIuCgpXZSBjYW4gYWxzbyB1c2UgdG9waWNzIGFzIHRoZSBmZWF0dXJlIHNldCBmb3IgYSBjbGFzc2lmaWNhdGlvbiBtb2RlbC4gSGVyZSB3ZSB3aWxsIGRvIGp1c3QgU1ZNIHRvIGNsYXNzaWZ5IHRoZSAiT3RoZXIiIGFkZHJlc3NlcyBhcyBlaXRoZXIgIkRlbW9jcmF0aWMiIG9yICJSZXB1YmxpY2FuLCIganVzdCBhcyB3ZSBkaWQgaW4gTm90ZWJvb2sgNi4KYGBge3J9CmxpYnJhcnkoZTEwNzEpCndvcmRzIDwtIGF1Z21lbnQoc290dV9sZGFfNTBhMDUsIGRhdGEgPSBzb3R1X2R0bSkgCiAgd29yZHMkeWVhciA8LSB3b3JkcyRkb2N1bWVudCAlPiUgc3RyX3JlcGxhY2UoIltBLXpfXFwuXStfIiwgIiIpICU+JSAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfcmVwbGFjZSgiX1swLTldKyQiLCAiIikgJT4lIGFzLm51bWVyaWMKICB3b3JkcyRwcmVzIDwtIHdvcmRzJGRvY3VtZW50ICU+JSBzdHJfcmVwbGFjZSgiX1swLTlcXC5fXSskIiwgIiIpICU+JQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cl9yZXBsYWNlX2FsbCgiXyIsICIgIikKICB3b3JkcyRwYXJ0eSA8LSBpZmVsc2Uod29yZHMkcHJlcyAlaW4lIGRlbW9jcmF0cywgIkRlbW9jcmF0aWMiLAogICAgICAgICAgICAgICAgIGlmZWxzZSh3b3JkcyRwcmVzICVpbiUgcmVwdWJsaWNhbnMsICJSZXB1YmxpY2FuIiwgIk90aGVyIikpCnRvcGljcyA8LSB3b3JkcyAlPiUgbXV0YXRlKHRvcGljID0gc3RyX2MoInRfIiwgLnRvcGljKSkgJT4lCiAgICAgICAgICAgIGdyb3VwX2J5KHllYXIsIHRvcGljLCBwYXJ0eSkgJT4lIHN1bW1hcml6ZSh3b3JkcyA9IHN1bShjb3VudCkpICU+JQogICAgICAgICAgICBzcHJlYWQodG9waWMsIHdvcmRzKQp0b3BpY3NbaXMubmEodG9waWNzKV0gPC0gMAp0cmFpbiA8LSB0b3BpY3NbdG9waWNzJHBhcnR5ICE9ICJPdGhlciIsIF0KY2xhc3MgPC0gdG9waWNzW3RvcGljcyRwYXJ0eSA9PSAiT3RoZXIiLCBdCm1vZGVsIDwtIHN2bSh0cmFpblsgLCAzOm5jb2wodHJhaW4pXSwgZmFjdG9yKHRyYWluJHBhcnR5KSkKcHJlZGljdGlvbnMgPC0gc290dSAlPiUgZmlsdGVyKHBhcnR5ID09ICJPdGhlciIpICU+JQogICAgICAgICAgICAgICAgbXV0YXRlKHByZWRpY3Rpb24gPSBwcmVkaWN0KG1vZGVsLCBjbGFzc1sgLCAzOm5jb2woY2xhc3MpXSkpCnRhYmxlKHByZWRpY3Rpb25zJHByZXMsIHByZWRpY3Rpb25zJHByZWRpY3Rpb24pCmBgYApMZXQncyB0ZXN0IHRoZSBtb2RlbCB1c2luZyBsZWF2ZS1vbmUtb3V0IGNyb3NzLXZhbGlkYXRpb24uCmBgYHtyfQpwcmVkaWN0aW9ucyA8LSBkYXRhLmZyYW1lKCkKZm9yKGkgaW4gMTpucm93KHRyYWluKSkgewogIG1vZGVsIDwtIHN2bSh0cmFpblstaSwgMzpuY29sKHRyYWluKV0sIGZhY3Rvcih0cmFpblstaSwgXSRwYXJ0eSkpCiAgcHJlZCA8LSBwcmVkaWN0KG1vZGVsLCB0cmFpbltpLCAzOm5jb2wodHJhaW4pXSkKICBwcmVkaWN0IDwtIHRyYWluW2ksIDE6Ml0gJT4lIHVuZ3JvdXAgJT4lIG11dGF0ZShwcmVkaWN0aW9uID0gcHJlZCkKICBwcmVkaWN0aW9ucyA8LSByYmluZChwcmVkaWN0aW9ucywgcHJlZGljdCkKfQp0YWJsZShwcmVkaWN0aW9ucyRwYXJ0eSwgcHJlZGljdGlvbnMkcHJlZGljdGlvbikKYGBgCgo=