<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?action=history&amp;feed=atom&amp;title=Word_Alignment</id>
	<title>Word Alignment - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?action=history&amp;feed=atom&amp;title=Word_Alignment"/>
	<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;action=history"/>
	<updated>2026-06-03T05:26:16Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.41.0</generator>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=402&amp;oldid=prev</id>
		<title>Tamchyna at 10:07, 25 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=402&amp;oldid=prev"/>
		<updated>2015-03-25T10:07:15Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 10:07, 25 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l51&quot;&gt;Line 51:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 51:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== See Also ==&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== See Also ==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Adam Lopez&#039; [http://lectures.ms.mff.cuni.cz/video/recordshow/index/44/172 lecture] on IBM model 1 and the accompanying [http://ufal.mff.cuni.cz/mtm13/files/05-word-alignment-adam-lopez.pdf slides]&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* &lt;/ins&gt;Adam Lopez&#039; [http://lectures.ms.mff.cuni.cz/video/recordshow/index/44/172 lecture] on IBM model 1 and the accompanying [http://ufal.mff.cuni.cz/mtm13/files/05-word-alignment-adam-lopez.pdf slides]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* Philipp Koehn&#039;s [http://www.statmt.org/book/slides/04-word-based-models.pdf#page=30 slides] (the slides contain the pseudo-code for IBM1)&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== References ==&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== References ==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=401&amp;oldid=prev</id>
		<title>Tamchyna at 10:01, 25 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=401&amp;oldid=prev"/>
		<updated>2015-03-25T10:01:35Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 10:01, 25 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l31&quot;&gt;Line 31:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 31:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &#039;&#039;&#039;expectation step&#039;&#039;&#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &quot;weight&quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &#039;&#039;fractional counts&#039;&#039;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &#039;&#039;&#039;expectation step&#039;&#039;&#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &quot;weight&quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;(because our initial probability is uniform) &lt;/ins&gt;from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &#039;&#039;fractional counts&#039;&#039;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l48&quot;&gt;Line 48:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 48:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* [https://codex3.ms.mff.cuni.cz/codex-trans/?groupId=3&amp;amp;taskId=11&amp;amp;module=groups%2Ftasks&amp;amp;page=specification Implement IBM-1 word alignment]  &lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* [https://codex3.ms.mff.cuni.cz/codex-trans/?groupId=3&amp;amp;taskId=11&amp;amp;module=groups%2Ftasks&amp;amp;page=specification Implement IBM-1 word alignment]  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;== See Also ==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Adam Lopez&#039; [http://lectures.ms.mff.cuni.cz/video/recordshow/index/44/172 lecture] on IBM model 1 and the accompanying [http://ufal.mff.cuni.cz/mtm13/files/05-word-alignment-adam-lopez.pdf slides]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== References ==&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== References ==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=400&amp;oldid=prev</id>
		<title>Tamchyna at 16:46, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=400&amp;oldid=prev"/>
		<updated>2015-03-24T16:46:28Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:46, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l35&quot;&gt;Line 35:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 35:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac\_count&lt;/del&gt;}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac\_count&lt;/del&gt;}(t&#039;, s_i)}&amp;lt;/math&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;count&lt;/ins&gt;}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;count&lt;/ins&gt;}(t&#039;, s_i)}&amp;lt;/math&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=399&amp;oldid=prev</id>
		<title>Tamchyna at 16:45, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=399&amp;oldid=prev"/>
		<updated>2015-03-24T16:45:50Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:45, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l35&quot;&gt;Line 35:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 35:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac_count&lt;/del&gt;}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac_count&lt;/del&gt;}(t&#039;, s_i)}&amp;lt;/math&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac\_count&lt;/ins&gt;}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;frac\_count&lt;/ins&gt;}(t&#039;, s_i)}&amp;lt;/math&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=398&amp;oldid=prev</id>
		<title>Tamchyna at 16:45, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=398&amp;oldid=prev"/>
		<updated>2015-03-24T16:45:22Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:45, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l35&quot;&gt;Line 35:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 35:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone through our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model):&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{frac_count}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{frac_count}(t&#039;, s_i)}&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{frac_count}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{frac_count}(t&#039;, s_i)}&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&amp;lt;/math&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=397&amp;oldid=prev</id>
		<title>Tamchyna at 16:45, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=397&amp;oldid=prev"/>
		<updated>2015-03-24T16:45:09Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:45, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l33&quot;&gt;Line 33:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 33:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model)&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;. The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;through &lt;/ins&gt;our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_{j_1},\ldots,t_{j_n}&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model)&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;:&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&amp;lt;math&amp;gt;P(t_{j_1}|s_i) = \frac{\text{frac_count}(t_{j_1}, s_i)}{\sum_{t&#039;} \text{frac_count}(t&#039;, s_i)}&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution &lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;which is &lt;/ins&gt;globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=== Model Limitations ===&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=== Model Limitations ===&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=396&amp;oldid=prev</id>
		<title>Tamchyna at 16:39, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=396&amp;oldid=prev"/>
		<updated>2015-03-24T16:39:24Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:39, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l33&quot;&gt;Line 33:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 33:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;t_j_1&lt;/del&gt;,\ldots,&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;t_j_n&lt;/del&gt;&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;t_{j_1}&lt;/ins&gt;,\ldots,&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;t_{j_n}&lt;/ins&gt;&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=395&amp;oldid=prev</id>
		<title>Tamchyna at 16:38, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=395&amp;oldid=prev"/>
		<updated>2015-03-24T16:38:52Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:38, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l31&quot;&gt;Line 31:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 31:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &#039;&#039;&#039;expectation step&#039;&#039;&#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &quot;weight&quot; &amp;lt;math&amp;gt;1/T&amp;lt;math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &#039;&#039;fractional counts&#039;&#039;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &#039;&#039;&#039;expectation step&#039;&#039;&#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &quot;weight&quot; &amp;lt;math&amp;gt;1/T&amp;lt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;/&lt;/ins&gt;math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &#039;&#039;fractional counts&#039;&#039;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_j_1,\ldots,t_j_n&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; with some target words &amp;lt;math&amp;gt;t_j_1,\ldots,t_j_n&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &amp;#039;&amp;#039;&amp;#039;maximization step&amp;#039;&amp;#039;&amp;#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=394&amp;oldid=prev</id>
		<title>Tamchyna at 16:38, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=394&amp;oldid=prev"/>
		<updated>2015-03-24T16:38:18Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:38, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l33&quot;&gt;Line 33:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 33:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the &amp;#039;&amp;#039;&amp;#039;expectation step&amp;#039;&amp;#039;&amp;#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &amp;quot;weight&amp;quot; &amp;lt;math&amp;gt;1/T&amp;lt;math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &amp;#039;&amp;#039;fractional counts&amp;#039;&amp;#039;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;math&amp;gt; with some target words &amp;lt;math&amp;gt;t_j_1,\ldots,t_j_n&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;/&lt;/ins&gt;math&amp;gt; with some target words &amp;lt;math&amp;gt;t_j_1,\ldots,t_j_n&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
	<entry>
		<id>https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=393&amp;oldid=prev</id>
		<title>Tamchyna at 16:37, 24 March 2015</title>
		<link rel="alternate" type="text/html" href="https://mttalks.ufal.ms.mff.cuni.cz/index.php?title=Word_Alignment&amp;diff=393&amp;oldid=prev"/>
		<updated>2015-03-24T16:37:39Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 16:37, 24 March 2015&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l30&quot;&gt;Line 30:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 30:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Initially, all of our translation probabilities are uniform.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;In the &#039;&#039;&#039;expectation step&#039;&#039;&#039;, we apply them to our data, i.e. we draw alignment links. Assuming our current sentence contains &amp;lt;math&amp;gt;T&amp;lt;/math&amp;gt; target words, we draw a link of &quot;weight&quot; &amp;lt;math&amp;gt;1/T&amp;lt;math&amp;gt; from each source word to each target word. And for each source word, we remember that we have seen it aligned to each of the target words in the sentence &amp;lt;math&amp;gt;1/T&amp;lt;/math&amp;gt; times. That is, we collect &#039;&#039;fractional counts&#039;&#039;.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Now assume that we have gone our whole corpus. We have seen a given source word &amp;lt;math&amp;gt;s_i&amp;lt;math&amp;gt; with some target words &amp;lt;math&amp;gt;t_j_1,\ldots,t_j_n&amp;lt;/math&amp;gt; some (fractional) number of times. Now in the &#039;&#039;&#039;maximization step&#039;&#039;&#039;, we turn these fractional counts into probabilities simply by normalizing them (this is the optimal thing to do, we could formally derive that this is the [http://en.wikipedia.org/wiki/Maximum_likelihood maximum likelihood estimate] for this model). The words that occurred frequently with our source word &amp;lt;math&amp;gt;s_i&amp;lt;/math&amp;gt; will have a higher fractional count, and therefore a higher translation probability.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-side-deleted&quot;&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;In the next iteration, we apply our newly learned probabilities to our data, collect the fractional counts etc. We can run this algorithm for a limited number of iterations or check for convergence (the conditional probabilities stop changing). In this simple model, we are guaranteed to find a solution globally optimal for our data.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=== Model Limitations ===&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=== Model Limitations ===&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tamchyna</name></author>
	</entry>
</feed>