Update
This commit is contained in:
parent
b6b5090c4c
commit
8312cd92c6
13 changed files with 139 additions and 113 deletions
|
@ -61,93 +61,41 @@ unsigned char charFollowers[256][N];
|
|||
|
||||
int main(void)
|
||||
{
|
||||
srand(SEED);
|
||||
srand(SEED);
|
||||
|
||||
for (int i = 0; i < 256; ++i)
|
||||
for (int j = 0; j < N; ++j)
|
||||
charFollowers[i][j] = ' ';
|
||||
for (int i = 0; i < 256; ++i)
|
||||
for (int j = 0; j < N; ++j)
|
||||
charFollowers[i][j] = ' ';
|
||||
|
||||
unsigned char prevChar = 0;
|
||||
unsigned char prevChar = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int c = getchar();
|
||||
while (1)
|
||||
{
|
||||
int c = getchar();
|
||||
|
||||
if (c == EOF)
|
||||
break;
|
||||
if (c == EOF)
|
||||
break;
|
||||
|
||||
#if IGNORE_NEWLINES
|
||||
if (c == '\n')
|
||||
c = ' ';
|
||||
if (c == '\n')
|
||||
c = ' ';
|
||||
#endif
|
||||
|
||||
charFollowers[prevChar][rand() % N] = c; // put char at random place
|
||||
prevChar = c;
|
||||
}
|
||||
charFollowers[prevChar][rand() % N] = c; // put char at random place
|
||||
prevChar = c;
|
||||
}
|
||||
|
||||
prevChar = ' ';
|
||||
prevChar = ' ';
|
||||
|
||||
for (int j = 0; j < OUTPUT_LEN; ++j) // now generate the output
|
||||
{
|
||||
prevChar = charFollowers[prevChar][rand() % N]; // take random follower
|
||||
putchar(prevChar);
|
||||
}
|
||||
for (int j = 0; j < OUTPUT_LEN; ++j) // now generate the output
|
||||
{
|
||||
prevChar = charFollowers[prevChar][rand() % N]; // take random follower
|
||||
putchar(prevChar);
|
||||
}
|
||||
|
||||
puts("\n");
|
||||
return 0;
|
||||
puts("\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Trying it out on the text of [this wiki](LRS_wiki.md) may output something like this:
|
||||
|
||||
```
|
||||
Ther thellialy oris threstpl/pifragmediaragmagoiby s agmexes, den
|
||||
atss pititpenaraly d thiplio s ts gs, tis wily NU gmarags gos
|
||||
aticel/EEECTherixed atstixedells, s s ores agolltixes tixe. TO: N
|
||||
s, s, TOpedatssth NUCAPorag: puffrits, pillly ars agmen No tpix abe
|
||||
aghe. aragmed ssh titixen plioix ag: Th tingoras TOD s wicipixe d
|
||||
tpllifr.edarenexeramed Thecospix ts ts s osth s pes ovipingor
|
||||
g: agors agass s TOnamand s aghech th wopipistalioiaris agontibuf
|
||||
ally Thrixtply tiaceca th oul/EEEEEEEECPU), wicth NU athed wen
|
||||
aragag athichipl Thechixthass s gmelliptilicex th ostunth gmagh
|
||||
atictpixe. ar Th on wipixexepifrag gman g: sthabopl/te.
|
||||
```
|
||||
|
||||
We see at first glance it looks a bit like English text, even with some quirks specific to this wiki, for example here and there having FULL CAPS words (due to acronyms and also rants that often appear here). It even generated the word "CPU". Notice the algorithm correctly learned punctuation, i.e. it knows that after commas and periods there is almost always space and after space there is usually not another space. For comparison here is a Spanish-like text generated from Don Quixote (with accents removed):
|
||||
|
||||
```
|
||||
Diloma Dadro hacaci gua usta lesano strore sto do diaco; ma ro
|
||||
hiciso stue ue dita. do que menotamalmeci ma quen do gue lo;
|
||||
denestajo qucos rdo horor Da que qunca. quadombuce que queromiderbre
|
||||
hera ha rlabue F de querdos Dio macino; dombidrompo mi ste derdiba
|
||||
l, mbiolo Ferbes l ste s lolo que ha Du hano quenore Dio ueno que
|
||||
hala F uano he Dorame de qus rl, ha didesa que halanora Fla quco
|
||||
dil qucio ue do mestostaloste hados de gusta querana. stuce F s s
|
||||
Do lo dre s hal Fro gue sa sa. la sido la dico; hado mbuno Do.
|
||||
mororo; rdenaja. qunolole Diba. do. Fa gor stamestamo ha quno
|
||||
unostabro quero mue s Diado Didota. quencoralor dio sotomo Fuen
|
||||
que halora. gunore quabrbe rol gostuno hadolmbe Da que unendor
|
||||
que le di so; qunta rajos s F de qucol
|
||||
```
|
||||
|
||||
We see some shorter words like *lo*, *le*, *de*, *he*, *que* and *sido* are real Spanish words. Though punctuation is quite nice, the algorithm fails to learn that after period the word of the next sentence should start with a capital letter (it only does so sometimes by pure chance) -- this is due to the algorithm only seeing one character back; after a period there is also one space which already makes the algorithm forget about the period. This would be addressed by keeping longer history, as said above. Now let's try a difference kind of text altogether, let's try to feed in the source code of [Anarch](anarch.md):
|
||||
|
||||
```
|
||||
2 camechererea = 20;
|
||||
#erereppon.xereponioightFuaighe16_ARABEIUnst
|
||||
chtreraySqua->rarepL_RCL_CL_PE;
|
||||
caminsin.yDINeramaxer = costRCL_PERCL_ditsins->pL_ime1
|
||||
= 0;
|
||||
|
||||
* = RCL_dime1,y 1)
|
||||
0;
|
||||
}
|
||||
}
|
||||
ck;
|
||||
camererayDimameaxSqua ca = ca->ra caininin.xS_UAME;
|
||||
caminstFua-> 0 0;
|
||||
} ca->ponstramiomereaxSquts chts 154;
|
||||
1)
|
||||
```
|
||||
|
||||
Here it's pretty clear the code won't work but its structure really does resemble the original source: curly brackets and semicolons are correctly followed by newlines, assignments look pretty correct as well, dereference arrows (`->`) appear too -- the code even generated the `RCL_` prefix of the [raycastlib](raycastlib.md) functions that's widely seen in the original code.
|
Loading…
Add table
Add a link
Reference in a new issue