I have a NodeJS server responsible for streaming data from an API and pushing the data to a local TCP port, 8080, on which Apache Spark is listening.
const net = require('net');
const client = new net.Socket();
const axios = require('axios');
client.connect(8080, '127.0.0.1');
client.on('connect', async () => {
const res = await axios.get('https://api.co.za', {
responseType: 'stream',
});
res.data.on('data', chunk => {
client.write(chunk);
});
});
Then Apache Spark attempts to read data from that port.
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.{ SparkConf, SparkContext }
object DataStream {
def main(args: Array[String]) {
val sparkConfig = new SparkConf()
.setAppName("Data Stream")
.setMaster(sys.env.get("spark.master")
.getOrElse("local[*]"))
val sparkContext = new SparkContext(sparkConfig)
sparkContext.setLogLevel("ERROR")
val streamingContext = new StreamingContext(sparkContext, Seconds(1))
val data = streamingContext.socketTextStream("127.0.0.1", 8080)
data.print()
streamingContext.start()
streamingContext.awaitTermination()
}
}
Then I open the port 8080 with netcat: nc -l 8080
Here's my problem, if I start my Node process first, it pushes data to the port but I do not see Spark reacting to the data. If I start Spark first, my Node process says its writing but I can't see data arriving at port 8080.
If I send data through directly through netcat after nc -l 8080, Spark has no problems reading it.
Is there some sort of client exclusivity happening with these local ports? Is there an alternative way of opening a port to be used this way?
OS: Ubuntu 19.10