-
Notifications
You must be signed in to change notification settings - Fork 29k
[WIP] Remove many uses of Thread.sleep() from streaming tests #3687
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
303e828
f8f6c93
6ce0681
5c31b8a
ad0056b
3db335f
b245217
12635b4
1a0fcb9
c81a477
abf5050
bc0db94
ee8c8f8
27c8def
9c939d9
590f006
c9c477f
63162b2
520bade
1304776
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,12 +74,15 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas | |
| newFilesOnly: Boolean = true) | ||
| extends InputDStream[(K, V)](ssc_) { | ||
|
|
||
| // This is a def so that it works during checkpoint recovery: | ||
| private def clock = ssc.scheduler.clock | ||
|
|
||
| // Data to be saved as part of the streaming checkpoints | ||
| protected[streaming] override val checkpointData = new FileInputDStreamCheckpointData | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I approve this change. But probably should be a different PR that just touches this input stream and its tests. |
||
| // Initial ignore threshold based on which old, existing files in the directory (at the time of | ||
| // starting the streaming application) will be ignored or considered | ||
| private val initialModTimeIgnoreThreshold = if (newFilesOnly) System.currentTimeMillis() else 0L | ||
| private val initialModTimeIgnoreThreshold = if (newFilesOnly) clock.currentTime() else 0L | ||
|
|
||
| /* | ||
| * Make sure that the information of files selected in the last few batches are remembered. | ||
|
|
@@ -151,7 +154,7 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas | |
| */ | ||
| private def findNewFiles(currentTime: Long): Array[String] = { | ||
| try { | ||
| lastNewFileFindingTime = System.currentTimeMillis | ||
| lastNewFileFindingTime = clock.currentTime() | ||
|
|
||
| // Calculate ignore threshold | ||
| val modTimeIgnoreThreshold = math.max( | ||
|
|
@@ -164,7 +167,7 @@ class FileInputDStream[K: ClassTag, V: ClassTag, F <: NewInputFormat[K,V] : Clas | |
| def accept(path: Path): Boolean = isNewFile(path, currentTime, modTimeIgnoreThreshold) | ||
| } | ||
| val newFiles = fs.listStatus(directoryPath, filter).map(_.getPath.toString) | ||
| val timeTaken = System.currentTimeMillis - lastNewFileFindingTime | ||
| val timeTaken = clock.currentTime() - lastNewFileFindingTime | ||
| logInfo("Finding new files took " + timeTaken + " ms") | ||
| logDebug("# cached file times = " + fileToModTime.size) | ||
| if (timeTaken > slideDuration.milliseconds) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,9 +59,11 @@ class SystemClock() extends Clock { | |
| private[streaming] | ||
| class ManualClock() extends Clock { | ||
|
|
||
| var time = 0L | ||
| private var time = 0L | ||
|
|
||
| def currentTime() = time | ||
| def currentTime() = this.synchronized { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I approve this. Please put this in the same PR as the file stream fix. |
||
| time | ||
| } | ||
|
|
||
| def setTime(timeToSet: Long) = { | ||
| this.synchronized { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -639,7 +639,7 @@ class BasicOperationsSuite extends TestSuiteBase { | |
| if (rememberDuration != null) ssc.remember(rememberDuration) | ||
| val output = runStreams[(Int, Int)](ssc, cleanupTestInput.size, numExpectedOutput) | ||
| val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] | ||
| assert(clock.time === Seconds(10).milliseconds) | ||
| assert(clock.currentTime() === Seconds(10).milliseconds) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please put this in the same PR as the file stream fix. |
||
| assert(output.size === numExpectedOutput) | ||
| operatedStream | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am a little uneasy with this approach because its not clean. Ideally nothing in Spark should refer to the requirements of higher level libraries like Spark Streaming.