-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Can't manage to have a string get analysed by brain.js #188
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
Do you have a code sample, perhaps even a jsfiddle? |
I do, it's a Node project so not sure what would be the best way to go about this, but if you know a better way, please let me know and I'll gladly do it! The only difference between this code and mine is that mine uses the twitter API to get the data I provided above. Basically my ideal goal would be for it to be able to separate the tweets by author and then be able to feed it some text for it to make a prediction on who it belongs to.
|
The main problem here is the inputs are going to have varying sizes, which won't train correctly. |
Here is a working prototype: https://jsfiddle.net/8Lvynxz5/36/ |
Here is a better working prototype (the other one I forgot to click save on) https://jsfiddle.net/8Lvynxz5/38/ |
Let me know if that isn't enough of an example to get this right.
|
Thank you very much for your help and pointers. I will have a good look at this tomorrow morning, really looking forward to it! :) |
If I am not mistaken, this step (to encode and fix length of input data) is not required with |
@mubaidr the unfortunate thing is that it seems like I can't use this, since I am running this on Node and as far as I understand that isn't available yet. Currently the app is working while getting data from the twitter API, though for some reason it leans heavily to one side at the moment! It seems like I am very close, but still a few tweaks needed here and there. const dotenv = require('dotenv').config()
const brain = require('brain.js')
const Twitter = require('twitter')
const http = require('http')
const express = require('express')
const app = express()
const socketIO = require('socket.io')
const server = http.createServer(app)
const io = socketIO(server);
const path = require('path')
let dataTweet = []
app.set('view engine', 'ejs')
const publicPath = path.join(__dirname, './public')
app.use(express.static(publicPath))
let net = new brain.NeuralNetwork();
let trainedNet;
let longest;
const params = { screen_name: 'realdonaldtrump', count: 10, result_type: 'recent', tweet_mode: 'extended' };
const paramsObama = { screen_name: 'barackobama', count: 10, result_type: 'recent', tweet_mode: 'extended' };
const tokens = {
consumer_key: process.env.CONSUMERKEY,
consumer_secret: process.env.consumer_secret,
access_token: process.env.access_token,
access_token_key: process.env.access_token_key,
access_token_secret: process.env.access_token_secret
}
const client = new Twitter(tokens)
function getTweets(user) {
const promise = new Promise((resolve, reject) => {
const params = { screen_name: `${user}`, count: 500, result_type: 'recent', tweet_mode: 'extended' };
console.log(params)
client.get('statuses/user_timeline', params)
.then((data) => {
if (data) {
resolve(data)
}
})
})
return promise
}
getTweets('realdonaldtrump')
.then((data) => {
data.forEach((tweet) => {
dataTweet.push({
input: tweet.full_text.split('https:')[0],
output: {[tweet.user.name.split(' ')[0]]: 1}
})
})
}).then(() => {
// train(getTrainingData(dataTweet))
}).then(() => {
getTweets('barackobama').then((data) => {
data.forEach((tweet) => {
dataTweet.push({
input: tweet.full_text.split('https:')[0],
output: {[tweet.user.name.split(' ')[0]]: 1}
})
})
}).then(() => {
train(getTrainingData(dataTweet))
console.log(trainedNet(encode(adjustSize('A TOTAL WITCH HUNT!!!'))));
console.log(trainedNet(encode(adjustSize('Incredible to have a Chicago team in the Final Four. I’ll take that over an intact bracket any day! Congratulations to everybody @loyolachicago - let’s keep it going!'))));
})
}).catch((e) => {
console.log(e)
})
// console.log(trainedNet(encode(adjustSize('Last night, it was my great honor to host America’s senior defense and military leaders for dinner at the White House. America’s military is the GREATEST fighting force in the history of the world. They all have my pledge of unwavering commitment to our men and women in uniform! '))));
// console.log(trainedNet(encode(adjustSize('Incredible to have a Chicago team in the Final Four. I’ll take that over an intact bracket any day! Congratulations to everybody @loyolachicago - let’s keep it going!'))));
function train(data) {
net.train(processTrainingData(data), {
iterations: 2000,
log:true,
learningRate: 0.1,
timeout: 5000
});
trainedNet = net.toFunction();
}
function encode(arg) {
return arg.split('').map(x => (x.charCodeAt(0) / 400));
}
function processTrainingData(data) {
const processedValues = data.map(d => {
return {
input: encode(d.input),
output: d.output
}
});
// console.log(processedValues);
return processedValues;
}
function getTrainingData(data) {
const trainingData = data
longest = trainingData.reduce((a, b) =>
a.input.length > b.input.length ? a : b).input.length;
for (let i = 0; i < trainingData.length; i++) {
trainingData[i].input = adjustSize(trainingData[i].input);
}
return trainingData;
}
function adjustSize(string) {
while (string.length < longest) {
string += ' ';
}
return string;
} |
Api is same whether you use in browser or node.js. I am currently on mobile, will look into code later. |
The api is the same in whatever javascript you are on. I started experimenting with a lstm version, which I believe is the answer here, I'll see what I can come up with as well. |
In case you would like to see where I am at right now, here is a link to the repo: https://github.com/moniac/real-time-web Thanks for the help already, it's really exciting to see this develop! |
It has been a while since I looked at the api (even though I built it, lol). We really should document stuff like this better but here you go: https://jsfiddle.net/j638LfLd/ Outputs:
Definition: let net = new brain.recurrent.LSTM();
net.train([
{
input: 'I say yes!',
output: 'positive'
},
{
input: 'I say no!'.
output: 'negative'
}
]);
const standaloneFunction = net.toFunction();
// for the curious at heart and want the non-readable version:
console.log(standaloneFunction.toString()); |
What does 'net.toFunction' do? |
It compiles the whole network into a single static function that is only used for the purpose it was trained for. |
Partially fix #188. Add missing api documentation to readme.
I apologize for reopening this issue, I have ran into another problem! Currently, I am trying to check tweets based on a hashtag and compare them to past tweets, to see if they fit with the previous tweets or not, doing this might be able to have the system recognize spam/trolls. I have tried both the LSTM and the NN, where the LSTM only returns 'gibberish', and the NN basically says any input is good input! Currently the system is looking at tweets that use the hashtag fortnite. Would I need to add another output classified as 'other'? This would be the NN version: let net = new brain.NeuralNetwork()
let trainedNet
let longest
let tweets = []
function train(data) {
net.train(processTrainingData(data), {
iterations: 2000,
log: true,
learningRate: 0.1,
timeout: 4000
})
trainedNet = net.toFunction()
}
function encode(arg) {
return arg.split('').map(x => x.charCodeAt(0) / 400)
}
function processTrainingData(data) {
const processedValues = data.map(d => {
return {
input: encode(d.input),
output: d.output
}
})
console.log(processedValues)
return processedValues
}
function getTrainingData(data) {
const trainingData = data
longest = trainingData.reduce(
(a, b) => (a.input.length > b.input.length ? a : b)
).input.length
for (let i = 0; i < trainingData.length; i++) {
trainingData[i].input = adjustSize(trainingData[i].input)
}
return trainingData
}
function adjustSize(string) {
while (string.length < longest) {
string += ' '
}
return string
}
var es = new EventSource('/stream')
es.addEventListener('connect', function(event) {
const text = JSON.parse(event.data)
if (text.tweet) {
tweets.push({
input: text.tweet,
output: { fortnite: 1 }
})
}
if (tweets.length === 2) {
console.log(getTrainingData(tweets))
train(tweets)
}
console.log(trainedNet(encode(adjustSize(text.tweet))))
console.log(trainedNet(encode(adjustSize('the legend of zelda'))))
}) And the LSTM version let net = new brain.recurrent.LSTM()
let trainedNet
let longest
let trainingData = []
function train(data) {
net.train(data, {
iterations: 1,
log: true
})
trainedNet = net.toFunction()
}
var es = new EventSource('/stream')
es.addEventListener('connect', function(event) {
const text = JSON.parse(event.data)
if (text.tweet && trainingData.length < 5) {
trainingData.push({
input: text.tweet,
output: { fortnite: 1 }
})
}
if (trainingData.length === 4) {
train(getTrainingData())
console.log(trainedNet('Fortnite is nice!', 1000))
}
})
function getTrainingData() {
return trainingData
} |
Some dummy data const trainingData = [
{
input:
"Fortnite, camo and old shoes: The 'normal' life of Steven Adams - via @ESPN",
output: { fornite: 1 }
},
{
input: 'U can get a scholarship for playing fortnite now? Wooooowwwwww',
output: { fornite: 1 }
},
{
input:
'they’re playing fortnite mobile, clear sign of special needs, they can’t help it :/',
output: { fornite: 1 }
},
{
input: 'I liked a @YouTube video ',
output: { fornite: 1 }
}
] |
What is wrong?
The training network doesn't train, it returns NaN for the data that it should have analysed.
Where does it happen?
In the app.js run through node, after receiving data from the Twitter API
How do we replicate the issue?
How important is this (1-5)?
A 4, since I think this should be able to work, but it could be my bad too!
Expected behavior (i.e. solution)
The data should have been analysed, but somewhere inbetween something went wrong!
Other Comments
I'm think the encoding method might not be correct here.
The text was updated successfully, but these errors were encountered: